source("eossa_new.r")
## Loading required package: svd
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## 
## WARNING: Rssa was compiled without FFTW support.
## The speed of the routines will be slower as well.
## 
## Attaching package: 'Rssa'
## The following object is masked from 'package:stats':
## 
##     decompose
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stats)
library(readr)
library(stats)
library(Rssa)
library(hpfilter)
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(lattice)
library(quadprog)
library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following object is masked from 'package:Rssa':
## 
##     decompose
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(urca)
library(tseries)

set.seed(52)

1 Косинус с периодом, совпадающим с решёткой

date <- 1:169
ts_cos <- ts(cos(2 * pi * date / 13 + 2), frequency = 13)

plot.ts(ts_cos, type="l")

spec.pgram(ts_cos, taper = 0, log='no', fast = FALSE)

Явно выделенный пик.

2 Косинус с периодом, не совпадающим с решёткой

date <- 1:150
ts_cos_2 <- ts(cos(2 * pi * date / 13 + 2), frequency = 13)

plot.ts(ts_cos_2, type="l")

spec.pgram(ts_cos_2, taper = 0, log='no', fast = FALSE)

Видим растекание периодограммы.

3 Белый шум

ts_white <- ts(rnorm(1000))
plot.ts(ts_white, type="l")

spec.pgram(ts_white, taper = 0, log='no', fast = FALSE)

Видно, что значения абсолютно случайны, с близким к экспоненциальному распределению.

4 Красный шум

generate_red_noise_ar1 <- function(n, phi, sigma = 1, x0 = rnorm(1, 0, 1 / sqrt(1 - phi ** 2))) {
  x <- numeric(n)
  x[1] <- x0
  
  for (i in 2:n) {
    x[i] <- phi * x[i - 1] + rnorm(1, mean = 0, sd = sigma)
  }
  
  x
}

n <- 1000
phi <- 0.9
red_noise <- generate_red_noise_ar1(n, phi)
ts_red <- ts(red_noise)

plot.ts(ts_red, type="l")

spec.pgram(ts_red, taper = 0, log='no', fast = FALSE)

5 Увеличение длины ряда

Возьмём \(2\) косинуса и красный шум и будем менять длину ряда:

for (i in 1:8) {
  last <- i * 100
  ts_w_cos <- ts(0.8 * cos(2 * pi * (1:(last)) / 10 + 3) + cos(2 * pi * (1:(last)) / 20 + 3) + 0.5 * generate_red_noise_ar1(last, phi) + rnorm(last))
  spec.pgram(ts_w_cos, taper = 0, log='no', fast = FALSE)
}

Видим увеличение пиков сезонности по сравнению с шумовыми компонентами.

6 Реальные данные

6.1 Данные по электричеству

Данные по выработке электричеству в США:

electricity_data <- read_csv("electricity_data.csv")
## New names:
## Rows: 255 Columns: 311
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (55): Connecticut : electric utility, Connecticut : all commercial, Ma... dbl
## (255): United States : all sectors, United States : electric utility, U... date
## (1): ...1
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
electricity_data <- electricity_data[1:252, ]
ts_electr_1 <- ts(as.vector(electricity_data$`United States : all sectors`), frequency = 12)
plot.ts(ts_electr_1, type="l")

spec.pgram(ts_electr_1, taper = 0, log='no', fast = FALSE, detrend = TRUE)

Видим отчётливые пики, то есть наблюдается периодичность.

Можно рассмотреть другой ряд:

electricity_data$electr <- electricity_data$`United States : independent power producers`
ts_electr_2 <- ts(as.vector(electricity_data$electr), frequency = 12)
plot.ts(ts_electr_2, type="l")

spec_electr <- spec.pgram(ts_electr_2, taper = 0, log='no', fast = FALSE, detrend = TRUE)

Здесь чуть лучше виден тренд.

7 Выделение тренда полиномиальной регрессией

Возьмём аппроксимацию ряда полиномом 4-ой степени:

lm_electr <- lm(electr ~ poly(...1, 4, raw = TRUE), data = electricity_data)
summary(lm_electr)
## 
## Call:
## lm(formula = electr ~ poly(...1, 4, raw = TRUE), data = electricity_data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -22253  -9052  -3439   6268  39320 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                -8.144e+06  2.484e+06  -3.278  0.00119 **
## poly(...1, 4, raw = TRUE)1  2.083e+03  6.725e+02   3.098  0.00217 **
## poly(...1, 4, raw = TRUE)2 -1.960e-01  6.769e-02  -2.896  0.00412 **
## poly(...1, 4, raw = TRUE)3  8.153e-06  3.003e-06   2.715  0.00710 **
## poly(...1, 4, raw = TRUE)4 -1.262e-10  4.956e-11  -2.547  0.01149 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12960 on 247 degrees of freedom
## Multiple R-squared:  0.5973, Adjusted R-squared:  0.5907 
## F-statistic: 91.57 on 4 and 247 DF,  p-value: < 2.2e-16
ts_electr_poly <- ts(predict(lm_electr), frequency = 12)
plot(ts_electr_2, type = "l")
lines(ts_electr_poly)

8 Loess

8.1 Выделяем тренд обычным Loess

Применим локальную регрессию с разным параметрами сглаживания:

electricity_data$index <- 1:nrow(electricity_data) 
loess_electr_10 <- loess(electr ~ index, data = electricity_data, span = 0.1)
loess_electr_30 <- loess(electr ~ index, data = electricity_data, span = 0.3)
loess_electr_50 <- loess(electr ~ index, data = electricity_data, span = 0.5)
ts_electr_loess_10 <- ts(predict(loess_electr_10), frequency = 12)
ts_electr_loess_30 <- ts(predict(loess_electr_30), frequency = 12)
ts_electr_loess_50 <- ts(predict(loess_electr_50), frequency = 12)
plot(ts_electr_2, type = "l")
lines(ts_electr_loess_10, col = "red")
lines(ts_electr_loess_30, col = "blue")
lines(ts_electr_loess_50, col = "green")
legend(x="bottomright", c("time series", "0.1 span", "0.3 span", "0.5 span"), col = c("black", "red", "blue", "green"), lty = 1, lwd = 1)

\(0.3\) — оптимальный параметр для ряда.

8.2 Работа с выбросами

Добавим к данным два выброса и учтём веса для этих наблюдений:

electr_data_outliers <- data.frame(index = electricity_data$index, electr = electricity_data$electr)
electr_data_outliers[25, 2] <- 50000
electr_data_outliers[200, 2] <- 50000
loess_electr_out_30 <- loess(electr ~ index, data = electr_data_outliers, family="symmetric", span = 0.3, control = loess.control(surface = "direct", iterations = 1))
loess_electr_out_noweight_30 <- loess(electr ~ index, data = electr_data_outliers, family="symmetric", span = 0.3, control = loess.control(surface = "direct", iterations = 25))
weight_loess <- rep(1, nrow(electr_data_outliers))
weight_loess[25] <- 0.1
weight_loess[200] <- 0.1
loess_electr_out_off_30 <- loess(electr ~ index, data = electr_data_outliers, family="symmetric", weights = weight_loess, span = 0.3)
ts_electr_2_out <- ts(electr_data_outliers$electr, frequency = 12)
ts_electr_loess_out_30 <- ts(predict(loess_electr_out_30), frequency = 12)
ts_electr_loess_out_noweight_30 <- ts(predict(loess_electr_out_noweight_30), frequency = 12)
ts_electr_loess_out_off_30 <- ts(predict(loess_electr_out_off_30), frequency = 12)
plot(ts_electr_2_out, type = "l")
lines(ts_electr_loess_out_noweight_30, col = "purple")
lines(ts_electr_loess_out_30, col = "blue")
lines(ts_electr_loess_out_off_30, col = "green")
legend(x = "bottomright", c("original", "iterative", "no iterative", "weight"), col = c("black", "purple", "blue", "green"), lty = 1, lwd = 1)

plot(loess_electr_out_30$residuals, type = "l", main = "no iterative")

plot(loess_electr_out_noweight_30$residuals, type = "l", main = "iterative")

plot(loess_electr_out_off_30$residuals, type = "l", main = "weight")

Учёт весов (ручной (weight) и автоматический (iterative)) действительно дал близкий к правильному результат.

9 HP-фильтр

Применим фильтр Ходрика-Прескота для выделения тренда:

y = data.frame(electr = electricity_data$electr)
ts_electr_hp_1 = ts(hp2(y, lambda = 14400)$electr, frequency = 12)
ts_electr_hp_2 = ts(hp2(y, lambda = 129606)$electr, frequency = 12)
plot(ts_electr_2, type="l")
lines(ts_electr_hp_1, col="red")
lines(ts_electr_hp_2, col="blue")
legend(x="bottomright", c("time series", "lambda = 14400", "lambda = 129606"), col = c("black", "red", "blue"), lty = 1, lwd = 1)

\(\lambda = 14400\) — оптимальный параметр, судя по поведению ряда.

10 Автокорреляционная функция

10.1 Белый шум

acf(ts_white)

10.2 Красный шум

acf(ts_red)

10.3 Косинус

acf(ts_cos)

10.4 Линейный тренд

acf(0.5 * (1:100) - 10)

10.5 Реальные данные (электричество)

acf(ts_electr_2)

Тренд + сезонность.

11 Амплитудно-частотная характеристика

afc <- function(filter, omega) {
  k <- seq_along(filter) - 1
  h <- function(o) sum(rev(filter) * exp(-k*1i * o))
  abs(sapply(omega, h))
}

freq <- seq(0, pi, 0.001)
omega <- freq/2/pi

11.1 Скользящее среднее

filt <- rep(1, 10)
plot(afc(filt, freq) ~ omega, type = "l")

Применим к косинусу:

ts_cos_filter <- stats::filter(ts_cos, rep(1, 5) / 5, sides = 2)
ts_cos_filter_shift <- stats::filter(ts_cos, rep(1, 5) / 5, sides = 1)
plot(ts_cos, type = "l", col = "gray", lwd = 1, main = "Фильтрация временного ряда", ylab = "Значение", xlab = "Время")
lines(ts_cos_filter, col = "red", lwd = 2)
lines(ts_cos_filter_shift, col = "blue", lwd = 2)

Видно смещение у одностороннего фильтра и уменьшение амплитуды у обоих.

Фильтр с продлением на концы:

filter_continue <- function(time_series, coef_filter, normalize = TRUE){
  n <- length(time_series)
  k <- length(coef_filter)
  filtered_series <- numeric(n)
  
  for (i in 1:n) {
    left_bound <- max(1, i - (k - 1) %/% 2)
    right_bound <- min(n, i + k %/% 2)
    
    sub_series <- time_series[left_bound:right_bound]
    sub_filter <- coef_filter[(left_bound - i + (k - 1) %/% 2 + 1):(right_bound - i + (k - 1) %/% 2 + 1)]
    
    if (normalize) {
      sub_filter <- sub_filter / sum(abs(sub_filter))
    }
    
    filtered_series[i] <- sum(sub_series * sub_filter)
  }
  
  filtered_series
}

Теперь к периодограмме белого шума:

ts_pgram_white <- ts(spec.pgram(ts_white, taper = 0, log='no', fast = FALSE, plot = FALSE)$spec)
ts_pgram_white_filter <- filter_continue(ts_pgram_white, rep(1, 20))
ts_pgram_white_filter_80 <- filter_continue(ts_pgram_white, rep(1, 80))
plot(ts_pgram_white)
lines(ts_pgram_white_filter, type = "l", col = "blue", lwd = 1)
lines(ts_pgram_white_filter_80, type = "l", col = "red", lwd = 1)

К периодограмме красного шума:

ts_red_7 <- ts(generate_red_noise_ar1(n, 0.7))
ts_pgram_red <- ts(spec.pgram(ts_red_7, taper = 0, log='no', fast = FALSE, plot = FALSE)$spec)
ts_pgram_red_filter <- filter_continue(ts_pgram_red, rep(1, 40))
plot(ts_pgram_red)
lines(ts_pgram_red_filter, type = "l", col = "blue", lwd = 1)

11.2 Последовательные разности

filt <- c(-1,1)
plot(afc(filt, freq) ~ omega, type = "l")

Попробуем применить фильтр к ряду с сезонностью и трендом:

ts_lin_sin <- ts(1/2 * (1:100) + cos(2 * pi * (1:100) / 5))
plot(ts_lin_sin)

ts_lin_sin_filter <- filter_continue(ts_lin_sin, c(-1, 1))
plot(ts_lin_sin_filter, type = "l")

pgram_lin_sin <- spec.pgram(ts_lin_sin, taper = 0, log='no', fast = FALSE, demean = FALSE, detrend = FALSE)

pgram_lin_sin_filter_culc <- pgram_lin_sin
pgram_lin_sin_filter_culc$spec <- pgram_lin_sin$spec * afc(c(-1, 1), seq(pi / 50, pi, pi / 50)) ** 2
pgram_lin_sin_filter <- spec.pgram(ts_lin_sin_filter, taper = 0, log='no', fast = FALSE, demean = FALSE, detrend = FALSE)

plot(pgram_lin_sin_filter_culc$spec, type = "l")

Видим, что фильтр действительно убирает тренд.

11.3 Реальные данные (электричество)

ts_electr_2_filter_24_1 <- ts(filter_continue(ts_electr_2, c(1 / 2, rep(1, 23), 1 / 2)), frequency = 12)
ts_electr_2_filter_24_2 <- ts(filter_continue(ts_electr_2_filter_24_1, c(1 / 2, rep(1, 23), 1 / 2)), frequency = 12)
ts_electr_2_filter_34 <- ts(filter_continue(ts_electr_2, c(1 / 2, rep(1, 47), 1 / 2)), frequency = 12)
plot(ts_electr_2, type = "l", col = "gray", lwd = 1, main = "Фильтрация временного ряда", ylab = "Значение", xlab = "Время")
lines(ts_electr_2_filter_24_1, col = "red", lwd = 2)
lines(ts_electr_2_filter_34, col = "blue", lwd = 2)
lines(ts_electr_2_filter_24_2, col = "green", lwd = 2)
legend(x = "bottomright", c("original", "filter 24", "filter 34", "filter 24 (x2)"), col = c("gray", "red", "blue", "green"), lty =  1, lwd = c(1, 2, 2, 2))

spec.pgram(ts_electr_2_filter_24_2, taper = 0, log='no', fast = FALSE, detrend = TRUE, na.action = na.omit)

Тренд явно выделился по сравнению с прошлой периодограммой.

12 SSA

12.1 Экспонента

ts_exp <- ts(1.1 ** (1:100))
s_exp <- ssa(ts_exp, kind = "1d-ssa", svd.method = "svd")

plot(ts_exp)

plot(s_exp)

plot(s_exp, type = "vectors", idx = 1:3)

plot(s_exp, type = "paired", idx = 1:3, plot.contrib = FALSE)

plot(wcor(s_exp, groups = 1:10),
      scales = list(at = c(10, 20, 30)))

r_exp <- reconstruct(s_exp, 
                      groups = list(Trend = 1))
plot(r_exp, add.residuals = TRUE, add.original = TRUE,
     plot.method = "xyplot",
     superpose = TRUE, auto.key = list(columns = 2))

Ранг ряда - 1.

12.2 Полином

ts_poli <- ts(2 * (1:169) ^2 - 3 * (1:169) + 1)
s_poli <- ssa(ts_poli, kind = "1d-ssa", svd.method = "svd")

plot(ts_poli)

plot(s_poli)

plot(s_poli, type = "vectors", idx = 1:5)

plot(s_poli, type = "series", groups = list(1, 1:2, 1:3))

plot(s_poli, type = "paired", idx = 1:3, plot.contrib = FALSE)

plot(wcor(s_poli, groups = 1:10),
      scales = list(at = c(10, 20, 30)))

r_poli <- reconstruct(s_poli, 
                      groups = list(Trend = 1:3))
plot(r_poli, add.residuals = TRUE, add.original = TRUE,
     plot.method = "xyplot",
     superpose = TRUE, auto.key = list(columns = 2))

s_poli$sigma[1:5]
## [1] 1.738257e+06 1.298326e+05 7.595883e+03 2.711291e-10 1.654407e-10

Ранг ряда - степень полинома + 1.

12.3 Косинус

s_cos <- ssa(ts_cos, kind = "1d-ssa", svd.method = "svd")

plot(ts_cos)

plot(s_cos)

plot(s_cos, type = "vectors", idx = 1:3)

plot(s_exp, type = "series", groups = 1:10)

plot(s_cos, type = "paired", idx = 1:3, plot.contrib = FALSE)

plot(wcor(s_cos, groups = 1:10),
      scales = list(at = c(10, 20, 30)))

r_cos <- reconstruct(s_cos, 
                      groups = list(Seasonality = 1:2))
plot(r_cos, add.residuals = TRUE, add.original = TRUE,
     plot.method = "xyplot",
     superpose = TRUE, auto.key = list(columns = 2))

Ранг ряда - 2.

12.4 Произведение косинуса и полинома

ts_cos_poli <- ts(cos(2 * pi * (1:169) / 13) * (2 * (1:169) ** 2 - 3 * (1:169) + 1))
s_cos_poli <- ssa(ts_cos_poli, kind = "1d-ssa", svd.method = "svd")

plot(ts_cos_poli)

plot(s_cos_poli)

plot(s_cos_poli, type = "vectors", idx = 1:7)

plot(s_cos_poli, type = "paired", idx = 1:6, plot.contrib = FALSE)

plot(wcor(s_cos_poli, groups = 1:10),
      scales = list(at = c(10, 20, 30)))

r_cos_poli <- reconstruct(s_cos_poli, 
                      groups = list(Seasonality = 1:6))
plot(r_cos_poli, add.residuals = TRUE, add.original = TRUE,
     plot.method = "xyplot",
     superpose = TRUE, auto.key = list(columns = 2))

Ранг ряда - 2 (косинус) умножить на 3 (полином) = 6.

12.5 Сумма косинусов (точная разделимость)

ts_cos_cos <- ts(cos(2 * pi * (1:143) / 11) + 4 * cos(2 * pi * (1:143) / 13))
s_cos_cos <- ssa(ts_cos_cos, kind = "1d-ssa", svd.method = "svd")

plot(ts_cos_cos)

plot(s_cos_cos)

plot(s_cos_cos, type = "vectors", idx = 1:5)

plot(s_cos_cos, type = "paired", idx = 1:6, plot.contrib = FALSE)

plot(wcor(s_cos_cos, groups = 1:10),
      scales = list(at = c(10, 20, 30)))

r_cos_cos <- reconstruct(s_cos_cos, 
                      groups = list(Seasonality = 1:4))
plot(r_cos_cos, add.residuals = TRUE, add.original = TRUE,
     plot.method = "xyplot",
     superpose = TRUE, auto.key = list(columns = 2))

Видна точная разделимость. Но если амплитуды равны:

ts_cos_cos_2 <- ts(cos(2 * pi * (1:143) / 11) + cos(2 * pi * (1:143) / 13))
s_cos_cos_2 <- ssa(ts_cos_cos_2, kind = "1d-ssa", svd.method = "svd")

plot(ts_cos_cos_2)

plot(s_cos_cos_2)

plot(s_cos_cos_2, type = "vectors", idx = 1:5)

plot(s_cos_cos_2, type = "paired", idx = 1:6, plot.contrib = FALSE)

plot(wcor(s_cos_cos_2, groups = 1:10),
      scales = list(at = c(10, 20, 30)))

r_cos_cos_2 <- reconstruct(s_cos_cos_2, 
                      groups = list(Seasonality = 1:4))
plot(r_cos_cos_2, add.residuals = TRUE, add.original = TRUE,
     plot.method = "xyplot",
     superpose = TRUE, auto.key = list(columns = 2))

То точная разделимость пропадает.

12.6 Косинус плюс линейный тренд

ts_cos_lin <- 5 * ts_cos + (1.5 * (1:169) - 2)
s_cos_lin <- ssa(ts_cos_lin, L = 78, kind = "1d-ssa", svd.method = "svd")

plot(ts_cos_lin)

plot(s_cos_lin)

plot(s_cos_lin, type = "vectors", idx = 1:5)

plot(s_cos_lin, type = "paired", idx = 1:4, plot.contrib = FALSE)

plot(wcor(s_cos_lin, groups = 1:10),
      scales = list(at = c(10, 20, 30)))

r_cos_lin <- reconstruct(s_cos_lin, 
                      groups = list(Trend = 1:2, Seasonality = 3:4))
plot(r_cos_lin, add.residuals = TRUE, add.original = TRUE,
     plot.method = "xyplot",
     superpose = TRUE, auto.key = list(columns = 2))

Теоретически точной разделимости линейного ряда и косинуса нет, но в данном случае длина ряда позволяет разделить их.

12.7 Реальные данные (электричество)

L_electr <- length(ts_electr_2) %/% 2 %/% 12 * 12
s_electr <- ssa(ts_electr_2, kind = "1d-ssa", svd.method = "svd", L = L_electr)

plot(ts_electr_2)

plot(s_electr)

plot(s_electr, type = "vectors", idx = 1:15)

plot(s_electr, type = "series", groups = list(c(1, 6), c(2:5, 7:8, 9:10)))

plot(s_electr, type = "paired", idx = 1:15, plot.contrib = FALSE)

plot(wcor(s_electr, groups = 1:30),
      scales = list(at = c(10, 20, 30)))

r_electr <- reconstruct(s_electr, 
                      groups = list(Trend = c(1, 6), Seasonality = c(2:5, 7:8, 9:10)))
plot(r_electr, add.residuals = TRUE, add.original = TRUE,
     plot.method = "xyplot",
     superpose = TRUE, auto.key = list(columns = 2))

spec.pgram(residuals(r_electr), taper = 0, log='no', fast = FALSE, demean = FALSE, detrend = FALSE)

Видно, что осталась какая-то сезонность и тренд, но обнаружить руками это достаточно трудно.

12.8 Автоматическая группировка

Попробуем автоматическую группировку для тренда и сезонности:

coef <- c(1 - 0.02, 1 + 0.02)
freq.bins.seas = list(trend = 0.05, s12 = 1/12 * coef, s6 = 1/6 * coef,
                      s4 = 1/4 * coef, s3 = 1/3 * coef,
                      s2.4 = 1/2.4 * coef, s2 = 1/2 * coef)
g_electr <- grouping.auto(s_electr, base = "series", 
                    freq.bins = freq.bins.seas, 
                    threshold = 0.7, groups = 1:20)
print(g_electr$trend)
## [1]  1  6 14
plot(g_electr, order = TRUE, type = "b")

r_electr_auto <- reconstruct(s_electr, g_electr)
plot(r_electr_auto, plot.method = "xyplot", superpose = TRUE, 
     add.residuals = FALSE)

Видим, что тренд неплохо выделился, но, возможно, немного смешался с сезонностью. А по спектрограмме видно, что нижние и основные частоты были убраны:

spec_electr_auto <- spec.pgram(residuals(r_electr_auto), taper = 0, log='no', fast = FALSE, demean = FALSE, detrend = FALSE, plot = FALSE)
w.pay <- seq(0, length.out = length(spec_electr$spec), 
             by = 1/length(ts_electr_2)) 
xyplot(log(spec_electr$spec) + log(spec_electr_auto$spec) ~ w.pay, 
       type = "l", xlab = NULL, ylab = NULL)

12.9 Fossa

Применим улучшение разделимости DerivSSA:

plot(s_electr, type = "vectors", idx = 1:20)

fss_electr <- fossa(s_electr, nested.groups = 1:16, gamma = 10)
plot(fss_electr, type = "vectors", idx = 1:20)

plot(fss_electr, type = "series", groups = list(13:16, 1:12))

coef <- c(1 - 0.02, 1 + 0.02)
freq.bins.seas = list(trend = 0.05, s12 = 1/12 * coef, s6 = 1/6 * coef,
                      s4 = 1/4 * coef, s3 = 1/3 * coef,
                      s2.4 = 1/2.4 * coef, s2 = 1/2 * coef)
g_electr_fossa <- grouping.auto(fss_electr, base = "series", 
                    freq.bins = freq.bins.seas, 
                    threshold = 0.7, groups = 1:20)
print(g_electr_fossa$trend)
## [1] 13 14 15 16
plot(g_electr_fossa, order = TRUE, type = "b")

r_electr_auto_fossa <- reconstruct(fss_electr, g_electr_fossa)
plot(r_electr_auto_fossa, plot.method = "xyplot", superpose = TRUE, 
     add.residuals = FALSE)

Немного лучше, но идеально не стало.

13 SSA с проекциями

Смоделируем полиномиальный ряд со степенью \(4\):

N_poli_4 <- 239
freq_12 <- 12
poli_trend <- 3 * ((1:N_poli_4) / 100) ** 4 - 5.5 * ((1:N_poli_4) / 100) ** 3 - 3 * ((1:N_poli_4) / 100) ** 2 + 1.5 * ((1:N_poli_4) / 100) + 15
ts_poli_4_poli <- ts(poli_trend, frequency = freq_12)
ts_poli_4 <- ts(poli_trend + 3 * cos(2 * pi * (1:N_poli_4) / freq_12 + pi / 3) + rnorm(N_poli_4), frequency = freq_12)
plot(ts_poli_4)

Попробуем выделить тренд обычным SSA:

L_poli_4 <- N_poli_4 %/% 2 %/% 12 * 12
s_poli_4 <- ssa(ts_poli_4, kind = "1d-ssa", svd.method = "svd", L = L_poli_4)

plot(s_poli_4)

plot(s_poli_4, type = "vectors", idx = 1:8)

plot(s_poli_4, type = "series", groups = list(c(1, 2, 5), c(3, 4)))

plot(wcor(s_poli_4, groups = 1:10),
      scales = list(at = c(10, 20, 30)))

r_poli_4 <- reconstruct(s_poli_4, 
                      groups = list(Trend = c(1, 2, 5), Seasonality = c(3, 4)))
plot(r_poli_4, add.residuals = TRUE, add.original = TRUE,
     plot.method = "xyplot",
     superpose = TRUE, auto.key = list(columns = 2))

Видим, что тренд смешался с сезонностью или шумом. Теперь попробуем применить SSA с проекциями:

r <- 4
s_poli_4_proj <- ssa(ts_poli_4, L = L_poli_4, column.projector = 3, row.projector = 2)
r_poli_4_proj <- reconstruct(s_poli_4_proj, groups = 
                      list(Trend = seq_len(nspecial(s_poli_4_proj))))
fit_poli_4 <- ts(r_poli_4_proj$Trend, frequency = 12)
fit_poli_4_reg <- ts(predict(lm(fit_poli_4 ~ poly((1:length(ts_poli_4)), r, raw = TRUE))), frequency = 12)

plot(ts_poli_4)
lines(ts_poli_4_poli, col = "red")
lines(fit_poli_4, col = "blue")
lines(fit_poli_4_reg, col = "green")
legend(x = "top", c("original", "poly reg", "SSA proj", "SSA proj reg"), col = c("black", "red", "blue", "green"), lty = 1, lwd = 1)

Видим, что тренд действительно намного лучше выделился и почти совпал с реальным трендом.

13.1 Реальные данные (электричество)

Мы видели, что тренд ряда по электричеству неплохо аппроксимировался 4-ой степенью. Попробуем тогда применить SSA с проекциями для выделения полиномиального тренда:

r <- 4
s_electr_proj <- ssa(ts_electr_2, L = L_electr, column.projector = 3, row.projector = 2)
r_electr_proj <- reconstruct(s_electr_proj, groups = 
                      list(Trend = seq_len(nspecial(s_electr_proj))))
fit_electr <- ts(r_electr_proj$Trend, frequency = 12)
fit_electr_reg <- ts(predict(lm(fit_electr ~ poly((1:length(ts_electr_2)), r, raw = TRUE))), frequency = 12)

plot(ts_electr_2)
lines(ts_electr_poly, col = "red")
lines(fit_electr, col = "blue")
lines(fit_electr_reg, col = "green")
legend(x = "bottomright", c("original", "poly reg", "SSA proj", "SSA proj reg"), col = c("black", "red", "blue", "green"), lty = 1, lwd = 1)

14 SSA как линейный фильтр

Функция, вычисляющая коэффициенты линейного фильтра на основе SSA:

make_filter_ssa <- function(u, L) {
  k <- length(u)
  
  mat1 <- t(matrix(rep(u[1:L], each = L), nrow = L))
  mat2 <- matrix(0, nrow = L, ncol = L)
  for (j in 0:(L - 1)) {
    mat2[j + 1, ] <- c(u[(j + 1):L], rep(0, j))
  }
  
  res <- colSums(mat1 * mat2)
  
  c(rev(res[-1]), res) / L
}

В результате, если взять точки от \(L\) до \(N - L + 1\), то получим нужный результат:

L <- 26
N <- 169
freq <- 13

s_cos_lin <- ssa(ts_cos_lin, L = L, kind = "1d-ssa", svd.method = "svd")
ts_cos_lin_filter_sum <- ts(rep(0, N), frequency = freq)
color_plot <- c("red", "blue", "green", "purple")

plot(ts_cos_lin)

for (i in 1:4) {
  u <- s_cos_lin$U[, i]
  filter_ssa <- make_filter_ssa(u, L)
  ts_cos_lin_filter <- ts(filter_continue(ts_cos_lin, filter_ssa, FALSE), frequency = freq)
  ts_cos_lin_filter_sum <- ts_cos_lin_filter_sum + ts_cos_lin_filter
  lines(ts_cos_lin_filter, col = color_plot[i])
}

lines(ts_cos_lin_filter_sum, col = "orange")
abline(v = (N - L) / freq + 1, col = "gray", lty = 2)
abline(v = L / freq + 1, col = "gray", lty = 2)
legend(x = "topleft", c("original", "1", "2", "3", "4", "sum"), col = c("black", color_plot, "orange"), lty = 1, lwd = 1)

Посмотрим на АЧХ фильтра SSA для синуса при разных \(L\):

L_vec <- c(13, 26, 39, 52)
freq <- seq(0, pi, 0.001)
omega <- freq/2/pi
u <- ssa(ts_cos_lin, L = L_vec[1], kind = "1d-ssa", svd.method = "svd")$U[, 3]
plot(omega, afc(make_filter_ssa(u, L_vec[1]), freq), type = "l", ylab = "Frequency response")
u <- ssa(ts_cos_lin, L = L_vec[2], kind = "1d-ssa", svd.method = "svd")$U[, 3]
lines(omega, afc(make_filter_ssa(u, L_vec[2]), freq), type = "l", col = "red")
u <- ssa(ts_cos_lin, L = L_vec[3], kind = "1d-ssa", svd.method = "svd")$U[, 3]
lines(omega, afc(make_filter_ssa(u, L_vec[3]), freq), type = "l", col = "blue")
u <- ssa(ts_cos_lin, L = L_vec[4], kind = "1d-ssa", svd.method = "svd")$U[, 3]
lines(omega, afc(make_filter_ssa(u, L_vec[4]), freq), type = "l", col = "green")
legend(x="topright", c("L = 13", "L = 26", "L = 39", "L = 52"), col = c("black", "red", "blue", "green"), lty = 1, lwd = 1)

15 Построение огибающей

Амплитуда, модулированная косинусом с большим периодом:

ts_cos_cos_long <- ts(cos(2 * pi * (1:197) / 100) * cos(2 * pi * (1:197) / 6))
plot(ts_cos_cos_long)

ts_cos_cos_sq <- 2 * ts_cos_cos_long ** 2
plot(ts_cos_cos_sq)

s_cos_cos_sq <- ssa(ts_cos_cos_sq, L = 96, kind = "1d-ssa", svd.method = "svd")

plot(s_cos_cos_sq)

plot(s_cos_cos_sq, type = "vectors", idx = 1:10)

plot(s_cos_cos_sq, type = "series", groups = list(c(1, 2, 5)))

plot(wcor(s_cos_cos_sq, groups = 1:10),
      scales = list(at = c(10, 20, 30)))

r_cos_cos_sq <- reconstruct(s_cos_cos_sq, 
                      groups = list(Trend = c(1, 2, 5)))
plot(r_cos_cos_sq, add.residuals = TRUE, add.original = TRUE,
     plot.method = "xyplot",
     superpose = TRUE, auto.key = list(columns = 2))

plot(ts_cos_cos_long)
lines(sqrt(r_cos_cos_sq$Trend))

Амплитуда — корень:

ts_cos_sqrt <- ts(sqrt(1:197) * cos(2 * pi * (1:197) / 6))
plot(ts_cos_sqrt)

ts_cos_sqrt_sq <- 2 * ts_cos_sqrt ** 2
plot(ts_cos_sqrt_sq)

s_cos_sqrt_sq <- ssa(ts_cos_sqrt_sq, L = 96, kind = "1d-ssa", svd.method = "svd")

plot(s_cos_sqrt_sq)

plot(s_cos_sqrt_sq, type = "vectors", idx = 1:10)

plot(s_cos_sqrt_sq, type = "series", groups = list(c(1, 4)))

plot(wcor(s_cos_sqrt_sq, groups = 1:10),
      scales = list(at = c(10, 20, 30)))

r_cos_sqrt_sq <- reconstruct(s_cos_sqrt_sq, 
                      groups = list(Trend = c(1, 4)))
plot(r_cos_sqrt_sq, add.residuals = TRUE, add.original = TRUE,
     plot.method = "xyplot",
     superpose = TRUE, auto.key = list(columns = 2))

plot(ts_cos_sqrt)
lines(sqrt(r_cos_sqrt_sq$Trend))

16 Выделение гетерогедостичности

N <- 300
sigma <- (5 + 3 * sin(2 * pi * (1:N) / 100))
ts_getero_noise <- ts(sigma * rnorm(N))
plot(ts_getero_noise)
lines(sigma)

ts_getero_noise_sq <- ts_getero_noise ** 2
plot(ts_getero_noise_sq)

s_getero_noise_sq <- ssa(ts_getero_noise_sq, kind = "1d-ssa", svd.method = "svd")

plot(s_getero_noise_sq)

plot(s_getero_noise_sq, type = "vectors", idx = 1:10)

plot(s_getero_noise_sq, type = "series", groups = list(c(1, 2, 3)))

plot(wcor(s_getero_noise_sq, groups = 1:10),
      scales = list(at = c(10, 20, 30)))

r_getero_noise_sq <- reconstruct(s_getero_noise_sq, 
                      groups = list(Trend = c(1, 2, 3)))
plot(r_getero_noise_sq, add.residuals = TRUE, add.original = TRUE,
     plot.method = "xyplot",
     superpose = TRUE, auto.key = list(columns = 2))

rebuild_sigma <- sqrt(r_getero_noise_sq$Trend)
plot(ts_getero_noise)
lines(rebuild_sigma, col = "blue")
lines(-rebuild_sigma, col = "blue")
lines(2 * rebuild_sigma, col = "red")
lines(-2 * rebuild_sigma, col = "red")

17 Box-Cox transformation

Для реального ряда электричества мы получаем, что параметр Box-cox преобразования очень близок к \(0.5\), что похоже на пуассоновский шум:

lambda <- BoxCox.lambda(ts_electr_2, method = "guerrero")
print(lambda)
## [1] 0.5172697
ts_electr_transform <- BoxCox(ts_electr_2, lambda)
plot(ts_electr_transform, type = "l")

s_electr_transform <- ssa(ts_electr_transform, kind = "1d-ssa", svd.method = "svd")

plot(s_electr_transform)

plot(s_electr_transform, type = "vectors", idx = 1:12)

plot(wcor(s_electr_transform, groups = 1:10),
      scales = list(at = c(10, 20, 30)))

r_electr_transform <- reconstruct(s_electr_transform, 
                      groups = list(Trend = c(1, 6), Seasonality = c(2:5, 7:10)))
plot(r_electr_transform, add.residuals = TRUE, add.original = TRUE,
     plot.method = "xyplot",
     superpose = TRUE, auto.key = list(columns = 2))

trend_electr_transform <- InvBoxCox(r_electr_transform$Trend, lambda)
plot(ts_electr_2)
lines(trend_electr_transform, col = "red")
lines(r_electr$Trend, col = "blue")
legend(x = "bottomright", legend = c("SSA + BC", "SSA"), lty = 1, col = c("red", "blue"))

Большой разницы нет.

18 Сравнение всех фильтров

Тренд, полученный SSA, усреднением, аппроксимацией, loess и т.д.:

plot(ts_electr_2)
lines(r_electr$Trend, col="red")
lines(r_electr_auto$trend, col="orange")
lines(r_electr_auto_fossa$trend, col="tomato4")
lines(fit_electr_reg, col="darkcyan")
lines(ts_electr_2_filter_24_2, col="blue")
lines(ts_electr_poly, col="green")
lines(ts_electr_loess_50, col="purple")
lines(ts_electr_hp_1, col="aquamarine")
legend(x = "bottomright", legend = c("SSA", "SSA auto", "FOSSA", "SSA poly reg", "Filter", "Polinom", "Loess", "HP"), lty = 1, col = c("red", "orange", "tomato4", "darkcyan", "blue", "green", "purple", "aquamarine"))

19 Линейные реккурентные формулы

19.1 Числа Фибоначчи (ЛРФ из ряда)

Смоделируем ряд Фибоначчи Линейной реккурентной формулой:

lrr_generate <- function(coef, first_members, n) {
  res <- c(first_members)
  k <- length(coef)
  
  for (i in (k + 1):n) {
    res <- c(res, sum(res[(i - k):(i - 1)] * coef))
  }
  
  res
}
N_fib <- 10
first_members_fib <- c(1, 1)
coef_fib <- c(1, 1)
fb_series <- lrr_generate(coef_fib, first_members_fib, N_fib)
cat("Fibonacci original series: ", fb_series, "\n")
## Fibonacci original series:  1 1 2 3 5 8 13 21 34 55

Теперь попробуем обратно из ряда получить коэффициенты ЛРФ. Для чего сначала составим траекторную матрицу:

create_tr_matrix <- function(series, L) {
  N <- length(series)
  K <- N - L + 1
  X <- matrix(nrow = L, ncol = K)

  for (i in 1:L) {
    for (j in 1:K) {
      X[i, j] <- series[j + i - 1]
    }
  }
  
  X
}
L_fib <- 5
X_fib <- create_tr_matrix(fb_series, L = L_fib)
X_fib
##      [,1] [,2] [,3] [,4] [,5] [,6]
## [1,]    1    1    2    3    5    8
## [2,]    1    2    3    5    8   13
## [3,]    2    3    5    8   13   21
## [4,]    3    5    8   13   21   34
## [5,]    5    8   13   21   34   55

Наконец найдём вектор \(B\) (ортогональный пространству столбцов матрицы, и последний элемент который равен \(-1\)):

find_min_norm_null_vector <- function(X) {
  X <- t(X)
  svd_res <- svd(X)
  V <- svd_res$v
  small_singular <- which(svd_res$d < sqrt(.Machine$double.eps))

  if (length(small_singular) == 0) {
    stop("У матрицы нет ненулевого нулевого пространства")
  }

  Null_basis <- V[, small_singular, drop = FALSE]

  dim_null <- ncol(Null_basis)

  Dmat <- t(Null_basis) %*% Null_basis
  dvec <- rep(0, dim_null)

  Aeq <- matrix(0, nrow = 1, ncol = dim_null)
  Aeq[1, ] <- Null_basis[nrow(Null_basis), ]
  beq <- -1

  sol <- solve.QP(Dmat, dvec, t(Aeq), beq, meq = 1)

  B <- Null_basis %*% sol$solution

  B
}
B_fib <- find_min_norm_null_vector(X_fib)
cat("B:\n")
## B:
print(B_fib)
##            [,1]
## [1,]  0.3333333
## [2,]  0.3333333
## [3,]  0.6666667
## [4,]  1.0000000
## [5,] -1.0000000

Проверим насколько эти коэффициенты соответствуют нашему ряду:

coef_fib_re <- B_fib[1:(length(B_fib) - 1)]
fb_series_re <- lrr_generate(coef_fib_re, fb_series[1:length(coef_fib_re)], N_fib)
cat("Fibonacci restored series: ", fb_series_re, "\n")
## Fibonacci restored series:  1 1 2 3 5 8 13 21 34 55

Соответствуют полностью!

19.2 Числа Фибоначчи (явная формула из ЛРФ)

Используем характеристические функции для поиска явной формулы ряда:

cluster_complex_numbers <- function(values, epsilon = 1e-4) {
  n <- length(values)
  adj_matrix <- matrix(0, n, n)

  if (n > 1) {
    for (i in 1:(n - 1)) {
      for (j in (i+1):n) {
        if (Mod(values[i] - values[j]) <= epsilon) {
          adj_matrix[i, j] <- 1
          adj_matrix[j, i] <- 1
        }
      }
    } 
  }

  g <- graph_from_adjacency_matrix(adj_matrix, mode = "undirected")
  clusters <- components(g)$membership

  unique_clusters <- unique(clusters)
  results <- list()

  for (cl in unique_clusters) {
    group_values <- values[clusters == cl]
    mean_value <- mean(group_values)
    group_size <- length(group_values)
    
    results <- append(results, list(list(mean = mean_value, size = group_size)))
  }

  return(results)
}

find_explicit_formula <- function(coeffs, initial_terms) {
  k <- length(coeffs)
  poly_roots <- polyroot(c(-coeffs, 1))
  roots_group <- cluster_complex_numbers(poly_roots)
  
  basis <- matrix(0, nrow = k, ncol = k)
  
  for (i in 1:k) {
    t <- 1
    
    for (elem in roots_group) {
      roots_group_size <- elem$size
      roots_group_mean <- elem$mean
      
      for (l in 1:roots_group_size) {
        basis[i, t] <- i ** (l - 1) * roots_group_mean ** i
        t <- t + 1
      }
    }
  }
  
  A <- ginv(basis) %*% initial_terms
  
  t <- 1
  
  for (j in 1:length(roots_group)) {
    roots_group_size <- roots_group[[j]]$size
    
    roots_group[[j]]$coef <- A[t:(t + roots_group_size - 1)]
    t <- t + roots_group_size
  }
  
  roots_group
}
list_formula_fib <- find_explicit_formula(coef_fib, first_members_fib)
list_formula_fib
## [[1]]
## [[1]]$mean
## [1] -0.618034-1.722177e-18i
## 
## [[1]]$size
## [1] 1
## 
## [[1]]$coef
## [1] -0.4472136-7.701809e-19i
## 
## 
## [[2]]
## [[2]]$mean
## [1] 1.618034+1.722177e-18i
## 
## [[2]]$size
## [1] 1
## 
## [[2]]$coef
## [1] 0.4472136-1.568657e-18i

Как это выглядит в виде математической формулы:

round_complex <- function(x, digits = 4) {
  round(Re(x), digits) + round(Im(x), digits) * 1i
}

create_str_formula <- function(list_formula) {
  res <- "x _n = "
  first_plus <- TRUE
  
  for (elem in list_formula) {
    if (first_plus) {
      first_plus <- FALSE
    } 
    else {
      res <- paste(res, " + ", sep = "")
    }
    
    list_formula_size <- elem$size
    list_formula_mean <- elem$mean
    list_formula_coef <- elem$coef
    
    res <- paste(res, "(", sep = "")
    
    for (i in 1:list_formula_size) {
      if (i != 1) {
        res <- paste(res, " + ", sep = "")
      }
      res <- paste(res, "(", toString(round_complex(list_formula_coef[i])), ")", sep = "")
      
      if (i != 1){
        res <- paste(res, " * n", sep = "")
      }
      
      if (i > 2) {
        res <- paste(res, " ^(", toString(i - 1), ")", sep = "")
      }
    }
    
    res <- paste(res, ") * (", toString(round_complex(list_formula_mean)), ") ^n", sep = "")
  }
  
  res
}
create_str_formula(list_formula_fib)
## [1] "x _n = ((-0.4472+0i)) * (-0.618+0i) ^n + ((0.4472+0i)) * (1.618+0i) ^n"

Наконец, заново смоделируем числа Фибоначчи с помощью явной формулы:

model_elem_by_formula <- function(list_formula, n) {
  res <- 0
  
  for (elem in list_formula) {
    list_formula_size <- elem$size
    list_formula_mean <- elem$mean
    list_formula_coef <- elem$coef
    
    brace_sum <- 0
    
    for (i in 1:list_formula_size) {
      brace_sum <- brace_sum + list_formula_coef[i] * n ** (i - 1)
    }
    
    res <- res + brace_sum * list_formula_mean ** n
  }
  
  res
}

model_series_by_formula <- function(list_formula, size) {
  res <- c()
  
  for (i in 1:size) {
    res <- c(res, Re(model_elem_by_formula(list_formula, i)))  
  }
  
  res
}
fb_series_formula <- model_series_by_formula(list_formula_fib, N_fib)
fb_series_formula
##  [1]  1  1  2  3  5  8 13 21 34 55

19.3 Косинус

Сделаем то же самое с косинусом:

N_cos <- 10
cos_series_orig <- cos(2 * pi * (1:N_cos) / freq_12)
first_members_cos <- c(cos(2 * pi / freq_12), cos(4 * pi / freq_12))
coef_cos <- c(-1, 2 * cos(2 * pi / freq_12))
coef_cos
## [1] -1.000000  1.732051
cos_series <- lrr_generate(coef_cos, first_members_cos, N_cos)
list_formula_cos <- find_explicit_formula(coef_cos, first_members_cos)
create_str_formula(list_formula_cos)
## [1] "x _n = ((0.5+0i)) * (0.866+0.5i) ^n + ((0.5+0i)) * (0.866-0.5i) ^n"
cos_series_formula <- model_series_by_formula(list_formula_cos, N_cos)
L_cos <- 5
X_cos <- create_tr_matrix(cos_series, L = L_cos)
X_cos
##               [,1]          [,2]          [,3]       [,4]          [,5]
## [1,]  8.660254e-01  5.000000e-01  2.220446e-16 -0.5000000 -8.660254e-01
## [2,]  5.000000e-01  2.220446e-16 -5.000000e-01 -0.8660254 -1.000000e+00
## [3,]  2.220446e-16 -5.000000e-01 -8.660254e-01 -1.0000000 -8.660254e-01
## [4,] -5.000000e-01 -8.660254e-01 -1.000000e+00 -0.8660254 -5.000000e-01
## [5,] -8.660254e-01 -1.000000e+00 -8.660254e-01 -0.5000000 -8.881784e-16
##               [,6]
## [1,] -1.000000e+00
## [2,] -8.660254e-01
## [3,] -5.000000e-01
## [4,] -8.881784e-16
## [5,]  5.000000e-01
B_cos <- find_min_norm_null_vector(X_cos)
cat("B:\n")
## B:
print(B_cos)
##            [,1]
## [1,] -0.5384615
## [2,] -0.1332347
## [3,]  0.3076923
## [4,]  0.6661734
## [5,] -1.0000000
coef_cos_re <- B_cos[1:(length(B_cos) - 1)]
cos_series_re <- lrr_generate(coef_cos_re, cos_series[1:length(coef_cos_re)], N_cos)
cat("Cos restored series: ", cos_series_re, "\n")
## Cos restored series:  0.8660254 0.5 2.220446e-16 -0.5 -0.8660254 -1 -0.8660254 -0.5 -2.498002e-16 0.5
plot(cos_series_orig, type = "l", col = "red")
lines(cos_series, lty = 2, lwd = 2, col = "blue")
lines(cos_series_re, lty = 6, lwd = 2, col = "purple")
lines(cos_series_formula, lty = 3, lwd = 2, col = "green")
legend(x="top", legend = c("original", "lrr_generate", "restored", "formula"), lty = c(1, 2, 6, 3), col = c("red", "blue", "purple", "green"))

19.4 Ранг конечного ряда

Сгенерируем сложный ряд с экспонентой, полиномом и косинусом:

N_epc <- 30
epc_series <- exp(2 * (1:N_epc) / N_epc) * (2 *(2 * (1:N_epc) / N_epc) ** 2 - 3 * (2 * (1:N_epc) / N_epc) + 6) * cos(2 * pi * (1:N_epc) / freq_12 + pi / 3)
epc_series
##  [1]  3.802129e-16 -3.219680e+00 -5.796557e+00 -6.974833e+00 -6.311765e+00
##  [6] -3.819071e+00 -1.475096e-15  4.234996e+00  7.763766e+00  9.522255e+00
## [11]  8.790981e+00  5.430320e+00  3.570528e-15 -6.283965e+00 -1.177051e+01
## [16] -1.474793e+01 -1.390363e+01 -8.765109e+00 -8.227981e-15  1.053797e+01
## [21]  2.008810e+01  2.558473e+01  2.448707e+01  1.565158e+01 -4.252692e-14
## [26] -1.926741e+01 -3.709317e+01 -4.765243e+01 -4.594904e+01 -2.955622e+01

Найдём ЛРФ этого ряда:

L_epc <- 7
X_epc <- create_tr_matrix(epc_series, L = L_epc)
B_epc <- find_min_norm_null_vector(X_epc)
cat("B:\n")
## B:
print(B_epc)
##            [,1]
## [1,]  -1.491825
## [2,]   7.251815
## [3,] -15.667262
## [4,]  19.039785
## [5,] -13.711570
## [6,]   5.554371
## [7,]  -1.000000
coef_epc_re <- B_epc[1:(length(B_epc) - 1)]
first_members_epc <- epc_series[1:length(coef_epc_re)]
epc_series_re <- lrr_generate(coef_epc_re, first_members_epc, N_epc)
cat("Restored series: ", epc_series_re, "\n")
## Restored series:  3.802129e-16 -3.21968 -5.796557 -6.974833 -6.311765 -3.819071 -3.609613e-14 4.234996 7.763766 9.522255 8.790981 5.43032 1.601264e-11 -6.283965 -11.77051 -14.74793 -13.90363 -8.765109 -5.845595e-11 10.53797 20.0881 25.58473 24.48707 15.65158 1.42671e-10 -19.26741 -37.09317 -47.65243 -45.94904 -29.55622

А теперь по ЛРФ найдём обратно явную формулу:

list_formula_epc <- find_explicit_formula(coef_epc_re, first_members_epc)
create_str_formula(list_formula_epc)
## [1] "x _n = ((1.5+2.5981i) + (-0.05-0.0866i) * n + (0.0022+0.0038i) * n ^(2)) * (0.9257+0.5345i) ^n + ((1.5-2.5981i) + (-0.05+0.0866i) * n + (0.0022-0.0038i) * n ^(2)) * (0.9257-0.5345i) ^n"
epc_series_formula <- model_series_by_formula(list_formula_epc, N_epc)

Видим, что всё совпадает:

plot(epc_series, type = "l", col = "red")
lines(epc_series_re, lty = 6, lwd = 2, col = "purple")
lines(epc_series_formula, lty = 3, lwd = 2, col = "green")
legend(x="top", legend = c("original", "lrr_generate", "formula"), lty = c(1, 6, 3), col = c("red", "purple", "green"))

19.5 SSA lrr

До этого найденные формулы имели комплексные числа в своей записи, хотя выдавали вещественные результаты, то есть по сути своей составляли вещественные выражения в комплексных значениях. Перепишим прошлые функции так, чтобы они выдавали суммы произведений косинусов, степеней и полиномов.

Функция кластеризации комплексных чисел почти не изменилась:

cluster_complex_numbers_conj <- function(values, epsilon = 1e-3) {
  n <- length(values)
  adj_matrix <- matrix(0, n, n)

  if (n > 1) {
    for (i in 1:(n - 1)) {
      for (j in (i+1):n) {
        if (Mod(values[i] - values[j]) / max(Mod(values[i]), abs(Mod(values[j]))) <= epsilon) {
          adj_matrix[i, j] <- 1
          adj_matrix[j, i] <- 1
        }
      }
    } 
  }

  g <- graph_from_adjacency_matrix(adj_matrix, mode = "undirected")
  clusters <- components(g)$membership

  unique_clusters <- unique(clusters)
  results <- list()

  for (cl in unique_clusters) {
    group_values <- values[clusters == cl]
    mean_value <- mean(group_values)
    group_size <- length(group_values)
    mod_value <- Mod(mean_value)
    arg_value <- Arg(mean_value)
    period_value <- 2 * pi / arg_value
    
    results <- append(results, list(list(mean = mean_value,
                                         period = period_value,
                                         arg = arg_value,
                                         mod = mod_value,
                                         size = group_size)))
  }

  results$type <- "raw"
  results
}

В функции нахождения явной формулы изменим нахождение корней через polyroot на метод ssa, а также для периодик перепишим нахождение коэффициентов:

find_roots_simple <- function(series) {
  len_series <- length(series)
  s_series_pre <- ssa(series, L = len_series %/% 2, svd.method = "svd")
  r <- sum(s_series_pre$sigma > sqrt(.Machine$double.eps))
  s_series <- ssa(series, L = r + 1, svd.method = "svd")
  
  l_series <- lrr(s_series, groups = list(1:r))
  
  roots(l_series)
}

find_explicit_formula_conj <- function(series, roots = c(), epsilon = 1e-3) {
  len_series <- length(series)
  
  if (length(roots) == 0)
    roots_series <- find_roots_simple(series)
  else
    roots_series <- roots
  
  r <- length(roots_series)
  
  roots_group <- cluster_complex_numbers_conj(roots_series, epsilon)
  basis <- matrix(0, nrow = len_series, ncol = r)
  
  first_to_len <- 1:len_series
  t <- 1
    
  for (i in 1:(length(roots_group) - 1)) {
    elem <- roots_group[[i]]
    roots_group_size <- elem$size
    roots_group_mod <- elem$mod
    roots_group_period <- elem$period
    
    if (roots_group_period > 1e16)
      powers <- roots_group_mod ** first_to_len
    else if (roots_group_period == 2)
      powers <- (-roots_group_mod) ** first_to_len
    else if (roots_group_period > 0)
      powers <- roots_group_mod ** first_to_len * sin(2 * pi * first_to_len / roots_group_period)
    else
      powers <- roots_group_mod ** first_to_len * cos(2 * pi * first_to_len / roots_group_period)
      
    for (l in 1:roots_group_size) {
      basis[first_to_len, t] <- first_to_len ** (l - 1) * powers
      t <- t + 1
    }
  }
  
  A <- lm(series ~ 0 + basis)
  A <- A$coefficients
  names(A) <- c()
  
  t <- 1
  
  for (j in 1:(length(roots_group) - 1)) {
    roots_group_size <- roots_group[[j]]$size
    
    roots_group[[j]]$coef <- A[t:(t + roots_group_size - 1)]
    t <- t + roots_group_size 
  }
  
  roots_group
}

Аналогичные изменения периодик внесём в функции нахождения строки и моделирования ряда:

create_str_formula_conj <- function(list_formula, digits = 4) {
  res <- "x _n = "
  first_plus <- TRUE
  
  for (i in 1:(length(list_formula) - 1)) {
    elem <- list_formula[[i]]
    if (first_plus) {
      first_plus <- FALSE
    } 
    else {
      res <- paste(res, " + ", sep = "")
    }
    
    list_formula_coef <- elem$coef
    list_formula_size <- elem$size
    list_formula_mod <- elem$mod
    list_formula_period <- elem$period
    
    res <- paste(res, "(", sep = "")
    
    for (i in 1:list_formula_size) {
      if (i != 1) {
        res <- paste(res, " + (", sep = "")
      }
      res <- paste(res, toString(round(list_formula_coef[i], digits)), sep = "")
      
      if (i != 1){
        res <- paste(res, ") * n", sep = "")
      }
      
      if (i > 2) {
        res <- paste(res, " ^(", toString(i - 1), ")", sep = "")
      }
    }
    
    if (abs(list_formula_period) > 1e16)
      res <- paste(res, ") * ", toString(round(list_formula_mod, digits)), " ^n", sep = "")
    else if (list_formula_period == 2)
      res <- paste(res, ") * (", toString(-round(list_formula_mod, digits)), ") ^n", sep = "")
    else {
      res <- paste(res, ") * ", sep = "")
      
      if (round(list_formula_mod, digits) != 1) {
        res <- paste(res, toString(round(list_formula_mod, digits))," ^n * ", sep = "")
      }
      
      if (list_formula_period > 0) {
        if (list_formula$type == "raw") {
          res <- paste(res, "sin(2 * pi * n / ", toString(round(list_formula_period, digits)), ")", sep = "")
        }
        else if (list_formula$type == "compressed") {
          res <- paste(res, "cos(2 * pi * n / ", toString(round(list_formula_period, digits)), " + (", toString(round(elem$phi, digits)), "))", sep = "")
        }
      }
      else
        res <- paste(res, "cos(2 * pi * n / ", toString(abs(round(list_formula_period, digits))), ")", sep = "")
    }
  }
  
  res
}

model_elem_by_formula_conj <- function(list_formula, n) {
  res <- 0
  
  for (i in 1:(length(list_formula) - 1)) {
    elem <- list_formula[[i]]
    list_formula_coef <- elem$coef
    list_formula_size <- elem$size
    list_formula_mod <- elem$mod
    list_formula_period <- elem$period
    
    brace_sum <- 0
    
    for (i in 1:list_formula_size) {
      brace_sum <- brace_sum + list_formula_coef[i] * n ** (i - 1)
    }
    
    if (abs(list_formula_period) > 1e16)
      res <- res + brace_sum * list_formula_mod ** n
    else if (list_formula_period == 2)
    res <- res + brace_sum * (-list_formula_mod) ** n
    else if (list_formula_period > 0) {
      if (list_formula$type == "raw") {
        res <- res + brace_sum * list_formula_mod ** n * sin(2 * pi * n / list_formula_period)
      }
      else if (list_formula$type == "compressed") {
        res <- res + brace_sum * list_formula_mod ** n * cos(2 * pi * n / list_formula_period + elem$phi)
      }
    }
    else
      res <- res + brace_sum * list_formula_mod ** n * cos(2 * pi * n / list_formula_period)
  }
  
  res
}

model_series_by_formula_conj <- function(list_formula, size) {
  res <- c()
  
  for (i in 1:size) {
    res <- c(res, Re(model_elem_by_formula_conj(list_formula, i)))  
  }
  
  res
}

Функция нахождения явной формулы не объединяет синусы и косинусы с одинаковым периодом, поэтому сделаем это в отдельной формуле:

compress_sincos_to_cos <- function(list_formula) {
  list_formula_cos <- list()
  
  for (i in 1:(length(list_formula) - 1)) {
    elem <- list_formula[[i]]
    period <- elem$period
    coef <- elem$coef[1]
    
    if (elem$period < 0 & elem$period > -1e16) {
      for (j in 1:(length(list_formula) - 1)) {
        elem1 <- list_formula[[j]]
        
        if (abs(abs(period) - elem1$period) < 1e-8) {
          coef1 <- elem1$coef[1]
          phi <- -atan2(coef1, coef)
          coef_new <- elem$coef / cos(phi)
          
          list_formula_cos <- append(list_formula_cos, list(list(mean = elem$mean,
                                                                 period = abs(period),
                                                                 arg = elem$arg,
                                                                 mod = elem$mod,
                                                                 size = elem$size,
                                                                 coef = coef_new,
                                                                 phi = phi)))
        }
      }
    }
    else if (elem$period == 2 | abs(elem$period) > 1e16) {
      list_formula_cos <- append(list_formula_cos, list(list(mean = elem$mean,
                                                             period = period,
                                                             arg = elem$arg,
                                                             mod = elem$mod,
                                                             size = elem$size,
                                                             coef = elem$coef,
                                                             phi = 0)))
    }
  }
  
  list_formula_cos$type <- "compressed"
  list_formula_cos
}

В качестве примера рассмотрим прошлый сложный ряд и ещё прибавим к нему косинус с другим периодом и смещением: \[ x _n = (6 - 0.2 n + 0.0089 n ^2) \cdot e ^{2n / 30} \cos(2 \pi n / 12 + \pi / 3) + \cos(2 \pi n / 5 - \pi / 4) \]

epc_sin_series <- epc_series + cos(2 * pi * (1:N_epc) / 5 - pi / 4)
plot(epc_sin_series, type = "l")

Найдём явную формулу в виде списка параметров:

list_formula_epc_sin <- find_explicit_formula_conj(epc_sin_series)
list_formula_epc_sin
## [[1]]
## [[1]]$mean
## [1] 0.9257284+0.5344696i
## 
## [[1]]$period
## [1] 12
## 
## [[1]]$arg
## [1] 0.5235988
## 
## [[1]]$mod
## [1] 1.068939
## 
## [[1]]$size
## [1] 3
## 
## [[1]]$coef
## [1] -5.196152423  0.173205081 -0.007698004
## 
## 
## [[2]]
## [[2]]$mean
## [1] 0.9257284-0.5344696i
## 
## [[2]]$period
## [1] -12
## 
## [[2]]$arg
## [1] -0.5235988
## 
## [[2]]$mod
## [1] 1.068939
## 
## [[2]]$size
## [1] 3
## 
## [[2]]$coef
## [1]  3.000000000 -0.100000000  0.004444444
## 
## 
## [[3]]
## [[3]]$mean
## [1] 0.309017+0.9510565i
## 
## [[3]]$period
## [1] 5
## 
## [[3]]$arg
## [1] 1.256637
## 
## [[3]]$mod
## [1] 1
## 
## [[3]]$size
## [1] 1
## 
## [[3]]$coef
## [1] 0.7071068
## 
## 
## [[4]]
## [[4]]$mean
## [1] 0.309017-0.9510565i
## 
## [[4]]$period
## [1] -5
## 
## [[4]]$arg
## [1] -1.256637
## 
## [[4]]$mod
## [1] 1
## 
## [[4]]$size
## [1] 1
## 
## [[4]]$coef
## [1] 0.7071068
## 
## 
## $type
## [1] "raw"
create_str_formula_conj(list_formula_epc_sin)
## [1] "x _n = (-5.1962 + (0.1732) * n + (-0.0077) * n ^(2)) * 1.0689 ^n * sin(2 * pi * n / 12) + (3 + (-0.1) * n + (0.0044) * n ^(2)) * 1.0689 ^n * cos(2 * pi * n / 12) + (0.7071) * sin(2 * pi * n / 5) + (0.7071) * cos(2 * pi * n / 5)"

И сократим запись убрав синусы и добавив смещение:

list_formula_epc_sin_comp <- compress_sincos_to_cos(list_formula_epc_sin)
list_formula_epc_sin_comp
## [[1]]
## [[1]]$mean
## [1] 0.9257284-0.5344696i
## 
## [[1]]$period
## [1] 12
## 
## [[1]]$arg
## [1] -0.5235988
## 
## [[1]]$mod
## [1] 1.068939
## 
## [[1]]$size
## [1] 3
## 
## [[1]]$coef
## [1]  6.000000000 -0.200000000  0.008888889
## 
## [[1]]$phi
## [1] 1.047198
## 
## 
## [[2]]
## [[2]]$mean
## [1] 0.309017-0.9510565i
## 
## [[2]]$period
## [1] 5
## 
## [[2]]$arg
## [1] -1.256637
## 
## [[2]]$mod
## [1] 1
## 
## [[2]]$size
## [1] 1
## 
## [[2]]$coef
## [1] 1
## 
## [[2]]$phi
## [1] -0.7853982
## 
## 
## $type
## [1] "compressed"
create_str_formula_conj(list_formula_epc_sin_comp)
## [1] "x _n = (6 + (-0.2) * n + (0.0089) * n ^(2)) * 1.0689 ^n * cos(2 * pi * n / 12 + (1.0472)) + (1) * cos(2 * pi * n / 5 + (-0.7854))"

Полученная формула аналогична оригинальной. По найденной формуле построим ряд и сравним с оригиналом:

epc_sin_series_formula <- model_series_by_formula_conj(list_formula_epc_sin_comp, N_epc)
plot(epc_sin_series, type = "l", col = "red")
lines(epc_sin_series_formula, lty = 3, lwd = 2, col = "green")
legend(x="bottom", legend = c("original", "formula (compressed)"), lty = c(1, 3), col = c("red", "green"))

Как и ожидалось, они совпадают.

19.6 LRR с шумом

Давайте внесём немного шума в ряд. Для этого возьмём для начала ряд косинуса плюс экспонента:

N_cos_long <- 100
cos_series_long <- exp(2 * (1:N_cos_long) / N_cos_long) + cos(2 * pi * (1:N_cos_long) / 12 + pi / 3)
plot(cos_series_long, type = "l")

Внесём шум:

cos_series_long_noise <- cos_series_long + rnorm(N_cos_long, 0, 1)
plot(cos_series_long_noise, type = "l")

Посмотрим, что выдаст прошлый алгоритм. Но в первую очередь на вывод lrr после того как мы передадим ему ряд с группами найденного сигнала:

L_cos_long <- (N_cos_long %/% 2) %/% 12 * 12
s_cos_series_long_noise <- ssa(cos_series_long_noise, L = L_cos_long, svd.method = "svd")
plot(s_cos_series_long_noise, type = "vectors", idx = 1:10)

plot(s_cos_series_long_noise, type = "series", groups = list(c(1, 2, 3)))

plot(wcor(s_cos_series_long_noise, groups = 1:10),
      scales = list(at = c(10, 20, 30)))

r_cos_series_long_noise <- reconstruct(s_cos_series_long_noise, 
                      groups = list(Trend = 1, Seasonality = c(2, 3)))
plot(r_cos_series_long_noise, add.residuals = TRUE, add.original = TRUE,
     plot.method = "xyplot",
     superpose = TRUE, auto.key = list(columns = 2))

l_cos_series_long_noise <- lrr(s_cos_series_long_noise, groups = list(1:3))

plot(l_cos_series_long_noise)

Видим очень много корней, но есть те которые выделяются на фоне других. С помощью функции parestimate мы можем найти сигнальные корни из всех остальных:

par_cos_long_noise <- parestimate(s_cos_series_long_noise, groups = list(1:3), method = "esprit")
par_cos_long_noise
##    period     rate   |    Mod     Arg  |     Re        Im
##       Inf   0.018919 |  1.01910   0.00 |  1.01910   0.00000
##    12.041  -0.001495 |  0.99851   0.52 |  0.86561   0.49773
##   -12.041  -0.001495 |  0.99851  -0.52 |  0.86561  -0.49773

Осталось только передать эти корни в нашу функцию (которую, правда, надо под это переделать):

list_formula_cos_long_noise <- find_explicit_formula_conj(cos_series_long_noise, roots = par_cos_long_noise$roots)
list_formula_cos_long_noise_comp <- compress_sincos_to_cos(list_formula_cos_long_noise)
create_str_formula_conj(list_formula_cos_long_noise_comp)
## [1] "x _n = (1.0217) * 1.0191 ^n + (1.2642) * 0.9985 ^n * cos(2 * pi * n / 12.0406 + (1.1079))"
cos_series_long_noise_formula <- model_series_by_formula_conj(list_formula_cos_long_noise_comp, 100)
plot(cos_series_long_noise, type = "l", col = "red")
lines(cos_series_long, lty = 6, lwd = 2, col = "purple")
lines(cos_series_long_noise_formula, lty = 3, lwd = 2, col = "green")
legend(x="bottom", legend = c("original + noise", "original", "formula (compressed)"), lty = c(1, 6, 3), col = c("red", "purple", "green"))

19.7 Cadzow

Применим итерации Cadzow к нашему сложному модельному ряду:

series <- cos_series_long_noise
N <- length(series)
L <- L_cos_long
K <- N - L + 1
rank <- 3

s0 <- ssa(series, L = L)
r0 <- reconstruct(s0, groups = list(signal = 1:rank))$signal

alpha <- 0.1
weights <- numeric(K)
weights[1:K] <- alpha
weights[seq(from = K, to = 1, by = -L)] <- 1
s <- ssa(series, L = L, column.oblique = "identity", 
         row.oblique = weights, decompose.force = FALSE)
c <- cadzow(s, rank = rank)
sc <- ssa(c, L = rank + 1)
rc <- reconstruct(sc, groups = list(signal = 1:rank))$signal
plot(series, type = "l")
lines(cos_series_long, col = "purple")
lines(r0, col = "blue")
lines(rc, col = "red", lty = 2)
legend(x = "topleft", c("original + noise", "original", "ssa", "cadzow"), col = c("black", "purple", "blue", "red"), lty = c(1, 1, 1, 2))

Видно, что он немного отличается от ssa, причём не в лучшую сторону. Посмотрим на формулу:

list_formula_cos_long_noise_c <- find_explicit_formula_conj(rc, parestimate(sc, groups = list(1:rank), method = "esprit")$roots)
list_formula_cos_long_noise_c_comp <- compress_sincos_to_cos(list_formula_cos_long_noise_c)

cat("Original:", create_str_formula_conj(list_formula_cos_long_noise_comp), "\n")
## Original: x _n = (1.0217) * 1.0191 ^n + (1.2642) * 0.9985 ^n * cos(2 * pi * n / 12.0406 + (1.1079))
cat("Cadzow:", create_str_formula_conj(list_formula_cos_long_noise_c_comp))
## Cadzow: x _n = (1.0403) * 1.019 ^n + (1.3107) * 0.9981 ^n * cos(2 * pi * n / 11.9986 + (1.0071))

Она значительно поменялась.

19.8 Явная формула для реального ряда (электричество)

Вспомним ряд электричества и найдём для него сигнальные корни:

plot(s_electr, type = "series", groups = list(c(1, 6), c(2:5, 7:10)))

l_electr <- lrr(s_electr, groups = list(1:20))
plot(l_electr)

Берём определённую группу, чтобы не вносить шум в формулу:

par_electr <- parestimate(s_electr, groups = list(c(1:10, 14)), method = "esprit")
par_electr
##    period     rate   |    Mod     Arg  |     Re        Im
##       Inf   0.000948 |  1.00095  -0.00 |  1.00095  -0.00000
##    12.014  -0.000249 |  0.99975   0.52 |  0.86610   0.49936
##   -12.014  -0.000249 |  0.99975  -0.52 |  0.86610  -0.49936
##     5.996  -0.000659 |  0.99934   1.05 |  0.49900   0.86584
##    -5.996  -0.000659 |  0.99934  -1.05 |  0.49900  -0.86584
##     3.999  -0.001616 |  0.99839   1.57 | -0.00028   0.99839
##    -3.999  -0.001616 |  0.99839  -1.57 | -0.00028  -0.99839
##     2.399  -0.003373 |  0.99663   2.62 | -0.86382   0.49709
##    -2.399  -0.003373 |  0.99663  -2.62 | -0.86382  -0.49709
##       Inf  -0.016668 |  0.98347  -0.00 |  0.98347  -0.00000
##       Inf  -0.045323 |  0.95569   0.00 |  0.95569   0.00000

По корням строим формулу:

list_formula_electr <- find_explicit_formula_conj(ts_electr_2, roots = par_electr$roots)
list_formula_electr_comp <- compress_sincos_to_cos(list_formula_electr)
create_str_formula_conj(list_formula_electr)
## [1] "x _n = (112739.0862) * 1.0009 ^n + (-8976.8651) * 0.9998 ^n * sin(2 * pi * n / 12.0135) + (-7280.0665) * 0.9998 ^n * cos(2 * pi * n / 12.0135) + (12884.5383) * 0.9993 ^n * sin(2 * pi * n / 5.9956) + (1856.4944) * 0.9993 ^n * cos(2 * pi * n / 5.9956) + (-1088.3709) * 0.9984 ^n * sin(2 * pi * n / 3.9993) + (3749.2924) * 0.9984 ^n * cos(2 * pi * n / 3.9993) + (4316.0924) * 0.9966 ^n * sin(2 * pi * n / 2.3987) + (-422.6512) * 0.9966 ^n * cos(2 * pi * n / 2.3987) + (998.5436) * 0.9835 ^n + (-45342.0133) * 0.9557 ^n"
create_str_formula_conj(list_formula_electr_comp)
## [1] "x _n = (112739.0862) * 1.0009 ^n + (11557.8317) * 0.9998 ^n * cos(2 * pi * n / 12.0135 + (2.2522)) + (13017.5996) * 0.9993 ^n * cos(2 * pi * n / 5.9956 + (-1.4277)) + (3904.0677) * 0.9984 ^n * cos(2 * pi * n / 3.9993 + (0.2825)) + (4336.737) * 0.9966 ^n * cos(2 * pi * n / 2.3987 + (-1.6684)) + (998.5436) * 0.9835 ^n + (-45342.0133) * 0.9557 ^n"
ts_electr_formula <- ts(model_series_by_formula_conj(list_formula_electr_comp, length(ts_electr_2)), frequency = 12)
plot(ts_electr_2, type = "l", col = "red")
lines(r_electr$Trend + r_electr$Seasonality, lty = 6, lwd = 2, col = "purple")
lines(ts_electr_formula, lty = 3, lwd = 2, col = "green")
legend(x="bottom", legend = c("original", "ssa", "formula (compressed)"), lty = c(1, 3), col = c("red", "purple", "green"))

Видим, что она получилась достаточно похожей на то, что выделил SSA.

19.9 Сбор корней на примере

Проверим, что если перевернуть ряд, то несигнальные корни останутся внутри единичного круга, а сигнальные поменяют расположение. Возьмём наш сложный ряд и добавим туда экспоненту с отрицательным показателем:

N_epc <- length(epc_series)
L_epc_exp <- 8
epc_series_exp <- epc_series + exp(-4 * (1:N_epc) / N_epc)
plot(epc_series_exp, type = "l")

s_epc_series_exp <- ssa(epc_series_exp, L = N_epc %/% 2, svd.method = "svd")
l_epc_series_exp <- lrr(s_epc_series_exp, groups = list(1:(L_epc_exp - 1)))

plot(l_epc_series_exp)

А теперь перевёрнутый

s_epc_series_exp_rev <- ssa(rev(epc_series_exp), L = N_epc %/% 2, svd.method = "svd")
l_epc_series_exp_rev <- lrr(s_epc_series_exp_rev, groups = list(1:(L_epc_exp - 1)))

plot(l_epc_series_exp_rev)

Действительно корни по модулю меньшие нуля стали больше нуля, а большие — меньше.

20 IOSSA

Применим IOSSA к ряду косинуса плюс полином второй степени:

N_cos_poly <- 100
ts_cos_12 <- ts(cos(2 * pi * (1:N_cos_poly) / 12), frequency = 12)
ts_poli_2 <- ts(5 * (2 * (1:N_cos_poly) / N_cos_poly) ** 2 - 3 * ((1:N_cos_poly) / N_cos_poly) + 10, frequency = 12)
ts_cos_poly <- ts_cos_12 + ts_poli_2
plot(ts_cos_poly)

s_cos_poly <- ssa(ts_cos_poly, L = (N_cos_poly %/% 2) %/% 12 * 12, kind = "1d-ssa", svd.method = "svd")
plot(s_cos_poly, type = "vectors", idx = 1:10)

r_cos_poly <- reconstruct(s_cos_poly, 
                      groups = list(Trend = c(1, 4, 5), Seasonality = 2:3))
s_cos_poly_i <- iossa(s_cos_poly, nested.groups = list(c(1, 4, 5), c(2, 3)))
plot(s_cos_poly_i, type = "vectors", idx = 1:10)
## Warning in .contribution(x, idx, ...): Elementary matrices are not F-orthogonal
## (max F-cor is -0.0102). Contributions can be irrelevant

r_cos_poly_i <- reconstruct(s_cos_poly_i, 
                      groups = list(Trend = 1:3, Seasonality = 4:5))

plot(r_cos_poly$Trend)
lines(r_cos_poly_i$Trend, col = "red")

21 EOSSA

Применим EOSSA к ряду косинуса плюс полином второй степени:

plot(ts_cos_poly)

s_cos_poly <- ssa(ts_cos_poly, L = (N_cos_poly %/% 2) %/% 12 * 12, kind = "1d-ssa", svd.method = "svd")
plot(s_cos_poly, type = "vectors", idx = 1:10)

r_cos_poly <- reconstruct(s_cos_poly, 
                      groups = list(Trend = c(1, 4, 5), Seasonality = 2:3))
s_cos_poly_e <- eossa(s_cos_poly, nested.groups = list(c(1, 4, 5), c(2, 3)))
plot(s_cos_poly_e, type = "vectors", idx = 1:10)
## Warning in .contribution(x, idx, ...): Elementary matrices are not F-orthogonal
## (max F-cor is 0.0178). Contributions can be irrelevant

r_cos_poly_e <- reconstruct(s_cos_poly_e, 
                      groups = list(Trend = 1:3, Seasonality = 4:5))

plot(ts_poli_2)
lines(r_cos_poly$Trend, col = "red", lty = 2, lwd = 2)
lines(r_cos_poly_e$Trend, col = "blue", lty = 3, lwd = 2)
legend(x = "topleft", c("original poli", "SSA trend", "EOSSA trend"), col = c("black", "red", "blue"), lty = c(1, 2, 3), lwd = c(1, 2, 2))

plot(ts_cos_12)
lines(r_cos_poly$Seasonality, col = "red", lty = 2, lwd = 2)
lines(r_cos_poly_e$Seasonality, col = "blue", lty = 3, lwd = 2)
legend(x = "topleft", c("original cos", "SSA seasonality", "EOSSA seasonality"), col = c("black", "red", "blue"), lty = c(1, 2, 3), lwd = c(1, 2, 2))

Видим, что мы действительно разделили тренд и сезонность.

21.1 Реальные данные

Попробуем применить eossa к реальным данным по электричеству:

plot(s_electr, type = "vectors", idx = 1:20)

s_electr_e <- eossa(s_electr, nested.groups = list(c(1, 6, 11:13), c(2:5, 7:10)), k = 6)
plot(s_electr_e, type = "vectors", idx = 1:20)
## Warning in .contribution(x, idx, ...): Elementary matrices are not F-orthogonal
## (max F-cor is 0.00566). Contributions can be irrelevant

r_electr_e <- reconstruct(s_electr_e, 
                      groups = list(Trend = c(1:3), Seasonality = c(4:13)))

plot(ts_electr_2)
lines(r_electr$Trend, col = "red", lty = 2, lwd = 2)
lines(r_electr_e$Trend, col = "blue", lty = 3, lwd = 2)
legend(x = "topleft", c("original", "SSA trend", "EOSSA trend"), col = c("black", "red", "blue"), lty = c(1, 2, 3), lwd = c(1, 2, 2))

plot(r_electr$Seasonality, lty = 1, lwd = 1)
lines(r_electr_e$Seasonality, col = "red", lty = 2, lwd = 2)
legend(x = "topleft", c("SSA seasonality", "EOSSA seasonality"), col = c("black", "red"), lty = c(1, 2), lwd = c(1, 2))

Видим, что в начале тренд выделился лучше.

21.2 EOSSA (new)

Проверим новый вариант eossa на реальных данных:

s_electr_e_n <- eossa_new(s_electr, nested.groups = list(c(1, 6, 11:13), c(2:5, 7:10)), clust_type = "distance")
s_electr_e_n$iossa.groups
## $F1
## [1] 8 9
## 
## $F2
## [1] 12 13
## 
## $F3
## [1] 1 2 3
## 
## $F4
## [1] 10 11
## 
## $F5
## [1] 4 5 6 7
plot(s_electr_e_n, type = "vectors", idx = 1:20)
## Warning in .contribution(x, idx, ...): Elementary matrices are not F-orthogonal
## (max F-cor is 0.000277). Contributions can be irrelevant

r_electr_e_n <- reconstruct(s_electr_e_n, 
                      groups = list(Trend = c(1:3), Seasonality = c(4:13)))

plot(ts_electr_2)
lines(r_electr$Trend, col = "red", lty = 2, lwd = 2)
lines(r_electr_e$Trend, col = "blue", lty = 3, lwd = 2)
lines(r_electr_e_n$Trend, col = "green", lty = 6, lwd = 2)
legend(x = "topleft", c("original", "SSA trend", "EOSSA trend", "EOSSA new trend"), col = c("black", "red", "blue", "green"), lty = c(1, 2, 3, 6), lwd = c(1, 2, 2, 2))

plot(r_electr$Seasonality, col = "red", lty = 1, lwd = 1)
lines(r_electr_e$Seasonality, col = "blue", lty = 2, lwd = 2)
lines(r_electr_e_n$Seasonality, col = "green", lty = 6, lwd = 2)
legend(x = "topleft", c("SSA seasonality", "EOSSA seasonality", "EOSSA new trend"), col = c("red", "blue", "green"), lty = c(1, 2, 6), lwd = c(1, 2, 2))

То же самое.

22 Decompose

Применим декомпозицию ряда: выделив только один период 12 и периоды 10, 12 и 13:

dec_electr <- stats::decompose(ts_electr_2)
plot(dec_electr)

spec.pgram(na.exclude(dec_electr$random), taper = 0, log='no', fast = FALSE)

acf(na.exclude(dec_electr$random))

ts_electr_10 <- ts(as.vector(electricity_data$electr), frequency = 10)
dec_electr_10 <- stats::decompose(ts_electr_10)
plot(dec_electr_10)

spec.pgram(na.exclude(dec_electr_10$random), taper = 0, log='no', fast = FALSE)

acf(na.exclude(dec_electr_10$random))

ts_electr_12 <- ts(dec_electr_10$x - dec_electr_10$seasonal, frequency = 12)
dec_electr_12 <- stats::decompose(ts_electr_12)
plot(dec_electr_12)

spec.pgram(na.exclude(dec_electr_12$random), taper = 0, log='no', fast = FALSE)

acf(na.exclude(dec_electr_12$random))

ts_electr_13 <- ts(dec_electr_12$x - dec_electr_12$seasonal, frequency = 13)
dec_electr_13 <- stats::decompose(ts_electr_13)
plot(dec_electr_13)

spec.pgram(na.exclude(dec_electr_13$random), taper = 0, log='no', fast = FALSE)

acf(na.exclude(dec_electr_13$random))

Тренд для одного периода даже глаже.

Теперь применим loess декомпозицию:

stl_electr <- stl(ts_electr_2, s.window = 7, s.degree = 1)
plot(stl_electr)

spec.pgram(stl_electr$time.series[, 3], taper = 0, log='no', fast = FALSE)

acf(stl_electr$time.series[, 3])

Сравнение трендов декомпозиции, loess и SSA:

plot(as.vector(dec_electr$x), type = "l")
lines(as.vector(r_electr_auto_fossa$trend), type = "l", col = "purple")
lines(as.vector(dec_electr$trend), type = "l", col = "red")
lines(as.vector(stl_electr$time.series[, 2]), type = "l", col = "blue")
legend(x = "bottomright", c("series", "Fossa SSA", "decompose 12", "decompose 10, 12, 13"), lty = 1, lwd = 1, col = c("black", "purple", "red", "blue"))

spec.pgram(as.vector(residuals(r_electr_auto_fossa)), taper = 0, log='no', fast = FALSE)

acf(as.vector(residuals(r_electr_auto_fossa)))

23 Пердсказание

Обрежем наш ряд на 2 периода:

ts_electr_sh <- ts(ts_electr_2[1:(length(ts_electr_2) - 2 * freq_12)], frequency = freq_12)

L_electr_sh <- length(ts_electr_sh) %/% 2 %/% 12 * 12
s_electr_sh <- ssa(ts_electr_sh, kind = "1d-ssa", svd.method = "svd", L = L_electr_sh)
plot(s_electr_sh, type = "vectors", idx = 1:20)

s_electr_sh_e <- eossa(s_electr_sh, nested.groups = list(c(1, 6, 11:13, 20), c(2:5, 7:10)), k = 7)

plot(s_electr_sh_e, type = "vectors", idx = 1:20)
## Warning in .contribution(x, idx, ...): Elementary matrices are not F-orthogonal
## (max F-cor is -0.0659). Contributions can be irrelevant

r_electr_sh_e <- reconstruct(s_electr_sh_e, 
                      groups = list(Trend = c(1:3), Seasonality = c(4:13, 20)))

plot(r_electr_sh_e, add.residuals = TRUE, add.original = TRUE,
     plot.method = "xyplot",
     superpose = TRUE, auto.key = list(columns = 2))

spec.pgram(as.vector(residuals(r_electr_sh_e)), taper = 0, log='no', fast = FALSE)

И предскажем сначала весь сигнал и сравним с рядом:

forecast.len <- 2 * freq_12
rfor_electr <- rforecast(s_electr_sh_e, groups = list(1:13),
                     len = forecast.len)
vfor_electr <- vforecast(s_electr_sh_e, groups = list(1:13),
                     len = forecast.len)
plot(ts_electr_2, type = "l")
lines(ts(c(rep(NA, length(ts_electr_sh)), rfor_electr), frequency = freq_12), type = "l", col = "blue")
lines(ts(c(rep(NA, length(ts_electr_sh)), vfor_electr), frequency = freq_12), type = "l", col = "red")
legend(x = "bottom", c("SSA", "rforecast", "vforecast"), lty = 1, col = c("black", "blue", "red"))

plot(ts(ts_electr_2[(length(ts_electr_2) - 2 * freq_12 + 1):length(ts_electr_2)], frequency = freq_12), type = "l")
lines(ts(rfor_electr, frequency = freq_12), type = "l", col = "blue")
lines(ts(vfor_electr, frequency = freq_12), type = "l", col = "red")
legend(x = "bottom", c("SSA", "rforecast", "vforecast"), lty = 1, col = c("black", "blue", "red"))

Теперь только тренд:

rfor_electr_tr <- rforecast(s_electr_sh_e, groups = list(1:3),
                     len = forecast.len)
vfor_electr_tr <- vforecast(s_electr_sh_e, groups = list(1:3),
                     len = forecast.len)
plot(r_electr_e$Trend, type = "l")
lines(ts(c(rep(NA, length(r_electr_sh_e$Trend)), rfor_electr_tr), frequency = freq_12), type = "l", col = "blue")
lines(ts(c(rep(NA, length(r_electr_sh_e$Trend)), vfor_electr_tr), frequency = freq_12), type = "l", col = "red")
legend(x = "bottom", c("SSA", "rforecast", "vforecast"), lty = 1, col = c("black", "blue", "red"))

plot(ts(r_electr_e$Trend[(length(r_electr_e$Trend) - 2 * freq_12 + 1):length(r_electr_e$Trend)], frequency = freq_12), type = "l")
lines(ts(rfor_electr_tr, frequency = freq_12), type = "l", col = "blue")
lines(ts(vfor_electr_tr, frequency = freq_12), type = "l", col = "red")
legend(x = "bottom", c("SSA", "rforecast", "vforecast"), lty = 1, col = c("black", "blue", "red"))

Наконец, период:

rfor_electr_se <- rforecast(s_electr_sh_e, groups = list(4:13),
                     len = forecast.len)
vfor_electr_se <- vforecast(s_electr_sh_e, groups = list(4:13),
                     len = forecast.len)
plot(r_electr_e$Seasonality, type = "l")
lines(ts(c(rep(NA, length(r_electr_sh_e$Seasonality)), rfor_electr_se), frequency = freq_12), type = "l", col = "blue")
lines(ts(c(rep(NA, length(r_electr_sh_e$Seasonality)), vfor_electr_se), frequency = freq_12), type = "l", col = "red")
legend(x = "bottom", c("SSA", "rforecast", "vforecast"), lty = 1, col = c("black", "blue", "red"))

plot(ts(r_electr_e$Seasonality[(length(r_electr_e$Seasonality) - 2 * freq_12 + 1):length(r_electr_e$Seasonality)], frequency = freq_12), type = "l")
lines(ts(rfor_electr_se, frequency = freq_12), type = "l", col = "blue")
lines(ts(vfor_electr_se, frequency = freq_12), type = "l", col = "red")
legend(x = "bottom", c("SSA", "rforecast", "vforecast"), lty = 1, col = c("black", "blue", "red"))

23.1 ЛРФ прогноз

Возьмём корни для ЛРФ:

par_electr_sh <- parestimate(s_electr_sh_e, groups = list(c(1:13)), method = "esprit")
par_electr_sh
##    period     rate   |    Mod     Arg  |     Re        Im
##       Inf   0.001272 |  1.00127   0.00 |  1.00127   0.00000
##     5.995  -0.000437 |  0.99956   1.05 |  0.49900   0.86610
##    -5.995  -0.000437 |  0.99956  -1.05 |  0.49900  -0.86610
##    12.000  -0.000909 |  0.99909   0.52 |  0.86523   0.49955
##   -12.000  -0.000909 |  0.99909  -0.52 |  0.86523  -0.49955
##     4.002  -0.001734 |  0.99827   1.57 |  0.00079   0.99827
##    -4.002  -0.001734 |  0.99827  -1.57 |  0.00079  -0.99827
##     2.399  -0.003505 |  0.99650   2.62 | -0.86348   0.49741
##    -2.399  -0.003505 |  0.99650  -2.62 | -0.86348  -0.49741
##  2191.237  -0.021492 |  0.97874   0.00 |  0.97873   0.00281
## -2191.237  -0.021492 |  0.97874  -0.00 |  0.97873  -0.00281
##    13.036  -0.027175 |  0.97319   0.48 |  0.86231   0.45113
##   -13.036  -0.027175 |  0.97319  -0.48 |  0.86231  -0.45113

Построим формулу и прогноз на 10 периодов:

list_formula_electr_sh <- find_explicit_formula_conj(ts_electr_sh, roots = par_electr_sh$roots)
list_formula_electr_comp_sh <- compress_sincos_to_cos(list_formula_electr_sh)
create_str_formula_conj(list_formula_electr_comp_sh)
## [1] "x _n = (103750.2691) * 1.0013 ^n + (12505.7378) * 0.9996 ^n * cos(2 * pi * n / 5.9948 + (-1.437)) + (13019.0011) * 0.9991 ^n * cos(2 * pi * n / 11.9998 + (2.1302)) + (3969.6153) * 0.9983 ^n * cos(2 * pi * n / 4.002 + (0.3681)) + (4268.9226) * 0.9965 ^n * cos(2 * pi * n / 2.3991 + (-1.6031)) + (309252.6649) * 0.9787 ^n * cos(2 * pi * n / 2191.2371 + (-1.6793)) + (9263.2443) * 0.9732 ^n * cos(2 * pi * n / 13.0355 + (-0.7867))"
ts_electr_sh_formula <- ts(model_series_by_formula_conj(list_formula_electr_comp_sh, length(ts_electr_sh) + 10 * freq_12), frequency = 12)
plot(ts_electr_sh_formula, lty = 3, lwd = 2, col = "green")
lines(r_electr_sh_e$Trend + r_electr_sh_e$Seasonality, lty = 6, lwd = 2, col = "purple")
lines(ts_electr_sh, type = "l", col = "red")
legend(x="bottom", legend = c("original", "ssa", "formula (compressed)"), lty = c(1, 3), col = c("red", "purple", "green"))

Так как у нас только один корень, который соответствует экспоненциальному поведению, то логично, что ряд будет возрастать постепенно с медленным увеличением скорости роста, что и видно на графике.

24 Доверительные интервалы

Построим бутстреп-доверительные интервалы для нашего предсказания (предсказательный интервал):

plot(s_electr_sh, type = "vectors", idx = 1:20)

for_electr <- forecast(s_electr_sh, groups = list(c(1:13)),
                    method = "vector", h = 2 * freq_12,
                    interval = "prediction",
                    level=c(0.8, 0.95))
plot(for_electr)
lines(ts_electr_2,col="black")

plot(for_electr, xlim = c(20, 22))
lines(ts_electr_2,col="black")

s_electr_sh_60 <- ssa(ts_electr_sh, L = 60, svd.method = "svd")
plot(s_electr_sh_60, type = "vectors", idx = 1:20)

for_electr_60 <- forecast(s_electr_sh_60, groups = list(c(1:10)),
                    method = "vector", h = 2 * freq_12,
                    interval = "prediction",
                    level=c(0.8, 0.95))
plot(for_electr_60, xlim = c(20, 22))
lines(ts_electr_2,col="black")

s_electr_sh_48 <- ssa(ts_electr_sh, L = 48, svd.method = "svd")
plot(s_electr_sh_48, type = "vectors", idx = 1:20)

for_electr_48 <- forecast(s_electr_sh_48, groups = list(c(1:8)),
                    method = "vector", h = 2 * freq_12,
                    interval = "prediction",
                    level=c(0.8, 0.95))
plot(for_electr_48, xlim = c(20, 22))
lines(ts_electr_2,col="black")

Видим, что реальные значения ряда полностью попадают даже в \(80\%\) предсказательный интервал.

Теперь доверительный интервал:

for_electr <- forecast(s_electr_sh, groups = list(c(1:13)),
                    method = "vector", h = 2 * freq_12,
                    interval = "confidence",
                    level=c(0.8, 0.95))
plot(for_electr)
lines(ts_electr_2,col="black")

plot(for_electr, xlim = c(20, 22))
lines(ts_electr_2,col="black")

Здесь уже видим, что далеко не все точки попадают в предсказательный интервал.

25 Работа с пропусками

Сначала воспользуемся стандартным заполнением пропусков на основе разложения и предсказания:

ts_electr_hole <- ts_electr_2
ts_electr_hole[100:150] <- NA
plot(ts_electr_hole)

s_electr_hole <- ssa(ts_electr_hole, L = 60)

g_electr <- gapfill(s_electr_hole, groups = list(c(1, 6)), method = "sequential", 
             base = "reconstructed")
g0_electr <- gapfill(s_electr_hole, groups = list(c(1, 6)), method = "sequential", 
              alpha = 0, base = "reconstructed")
g1_electr <- gapfill(s_electr_hole, groups = list(c(1, 6)), method = "sequential", 
              alpha = 1, base = "reconstructed")
plot(ts_electr_2, col = "black")
lines(g0_electr, col = "blue", lwd = 2)
lines(g1_electr, col = "green", lwd = 2)
lines(g_electr, col = "red", lwd = 2)

g_electr_all <- gapfill(s_electr_hole, groups = list(c(1:13)), method = "sequential", 
             base = "reconstructed")
g0_electr_all <- gapfill(s_electr_hole, groups = list(c(1:13)), method = "sequential", 
              alpha = 0, base = "reconstructed")
g1_electr_all <- gapfill(s_electr_hole, groups = list(c(1:13)), method = "sequential", 
              alpha = 1, base = "reconstructed")
plot(ts_electr_2, col = "black")
lines(g0_electr_all, col = "blue", lwd = 2)
lines(g1_electr_all, col = "green", lwd = 2)
lines(g_electr_all, col = "red", lwd = 2)

Видим, что в месте, где у нас пропущены значения происходит скачок, поэтому восстановление слева выше реальных значений, а восстановление справа — ниже. Хотя в среднем значения достаточно близки к правде.

25.1 Итеративное заполнение

Рассмотрим итеративное заполнение пропусков:

ig_electr <- igapfill(s_electr_hole, groups = list(c(1, 6)), 
               base = "reconstructed")
igo_electr <- igapfill(s_electr_hole, groups = list(c(1, 6)), 
                base = "original")

plot(ts_electr_2, col="black")
lines(ig_electr, col = "blue", lwd = 1)
lines(igo_electr, col = "red", lwd = 1)

ig1_electr <- igapfill(s_electr_hole, groups = list(c(1, 6)), 
                base = "original", maxiter = 1)
ig5_electr <- igapfill(s_electr_hole, groups = list(c(1, 6)), fill = ig1_electr,
                base = "original", maxiter = 4)
ig10_electr <- igapfill(s_electr_hole, groups = list(c(1, 6)), fill = ig5_electr, 
                 base = "original", maxiter = 5)
init.lin <- ts_electr_2
init.lin[100:150] <- ts_electr_2[100] + (0:100) / 100 * (ts_electr_2[150] - ts_electr_2[100])
## Warning in NextMethod("[<-"): number of items to replace is not a multiple of
## replacement length
ig.lin_electr <- igapfill(s_electr_hole, 
                  fill = init.lin, 
                  groups = list(c(1, 6)), 
                  base = "original", maxiter = 10)
# Compare the result
plot(ts_electr_2, col = "black")
lines(ig1_electr, col = "green", lwd = 1)
lines(ig5_electr, col = "blue", lwd = 1)
lines(ig10_electr, col = "red", lwd = 1)
lines(ig.lin_electr, col = "darkred", lwd = 1)

Заполненный тренд становится более выгнутым, подстраиваясь под реальные данные.

25.2 Сравнение на синтетике

Посчитаем среднюю ошибку заполения в центре:

N_csn <- 252
ts_cos_s <- ts(exp(2 * (1:N_csn) / N_csn) + cos(2 * pi * (1:N_csn) / freq_12 + pi / 3), frequency = freq_12)
ts_cos_sn <- ts(ts_cos_s + 0.4 * rnorm(N_csn), frequency = freq_12)
plot(ts_cos_sn)
ts_cos_sn_hm <- ts_cos_sn
ts_cos_sn_hm[108:144] <- NA
lines(ts_cos_sn_hm, col = "red")

s_cos_sn_hm <- ssa(ts_cos_sn_hm, L = 5 * freq_12, svd.method = "svd")
plot(s_cos_sn_hm, type = "vectors", idx = 1:10)

g_cos_sn_hm <- gapfill(s_cos_sn_hm, groups = list(c(1, 2, 3)), method = "sequential", 
             base = "reconstructed")

ig_cos_sn_hm <- igapfill(s_cos_sn_hm, groups = list(c(1, 2, 3)), 
                base = "reconstructed")

plot(ts_cos_s, col="black")
lines(g_cos_sn_hm, col = "blue", lwd = 1)
lines(ig_cos_sn_hm, col = "red", lwd = 1)
legend(x = "topleft", c("original trend", "gapfill", "igapfill"), lty = 1, lwd = 1, col = c("black", "blue", "red"))

N_model <- 100
mse_g <- c()
mse_ig <- c()

for (i in 1:N_model) {
  ts_cos_s <- ts(exp(2 * (1:N_csn) / N_csn) + cos(2 * pi * (1:N_csn) / freq_12 + pi / 3), frequency = freq_12)
  ts_cos_sn <- ts(ts_cos_s + 0.4 * rnorm(N_csn), frequency = freq_12)
  ts_cos_sn_hm <- ts_cos_sn
  ts_cos_sn_hm[108:144] <- NA
  
  s_cos_sn_hm <- ssa(ts_cos_sn_hm, L = 5 * freq_12, svd.method = "svd")
  
  g_cos_sn_hm <- gapfill(s_cos_sn_hm, groups = list(c(1, 2, 3)), method = "sequential", 
             base = "reconstructed")

  ig_cos_sn_hm <- igapfill(s_cos_sn_hm, groups = list(c(1, 2, 3)), 
                  base = "reconstructed")
  
  mse_g <- c(mse_g, mean((ts_cos_s[108:144] - g_cos_sn_hm[108:144]) ** 2))
  mse_ig <- c(mse_ig, mean((ts_cos_s[108:144] - ig_cos_sn_hm[108:144]) ** 2))
}

cat("gapfill ", mean(mse_g), "\n")
## gapfill  0.008221212
cat("iterative gapfill ", mean(mse_ig), "\n")
## iterative gapfill  0.003989773

Итеративный способ лучше:

t.test(mse_g, mse_ig)
## 
##  Welch Two Sample t-test
## 
## data:  mse_g and mse_ig
## t = 6.8524, df = 170.4, p-value = 1.271e-10
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.003012474 0.005450403
## sample estimates:
##   mean of x   mean of y 
## 0.008221212 0.003989773

А теперь на краях:

plot(ts_cos_sn)
ts_cos_sn_he <- ts_cos_sn
ts_cos_sn_he[216:252] <- NA
lines(ts_cos_sn_he, col = "red")

s_cos_sn_he <- ssa(ts_cos_sn_he, L = 5 * freq_12, svd.method = "svd")
plot(s_cos_sn_he, type = "vectors", idx = 1:10)

g_cos_sn_he <- gapfill(s_cos_sn_he, groups = list(c(1, 2, 3)), method = "sequential", 
             base = "reconstructed")

ig_cos_sn_he <- igapfill(s_cos_sn_he, groups = list(c(1, 2, 3)), 
                base = "reconstructed")

plot(ts_cos_s, col="black")
lines(g_cos_sn_he, col = "blue", lwd = 1)
lines(ig_cos_sn_he, col = "red", lwd = 1)
legend(x = "topleft", c("original trend", "gapfill", "igapfill"), lty = 1, lwd = 1, col = c("black", "blue", "red"))

N_model <- 100
mse_g_e <- c()
mse_ig_e <- c()

for (i in 1:N_model) {
  ts_cos_s <- ts(exp(2 * (1:N_csn) / N_csn) + cos(2 * pi * (1:N_csn) / freq_12 + pi / 3), frequency = freq_12)
  ts_cos_sn <- ts(ts_cos_s + 0.4 * rnorm(N_csn), frequency = freq_12)
  ts_cos_sn_he <- ts_cos_sn
  ts_cos_sn_he[216:252] <- NA
  
  s_cos_sn_he <- ssa(ts_cos_sn_he, L = 5 * freq_12, svd.method = "svd")
  
  g_cos_sn_he <- gapfill(s_cos_sn_he, groups = list(c(1, 2, 3)), method = "sequential", 
             base = "reconstructed")

  ig_cos_sn_he <- igapfill(s_cos_sn_he, groups = list(c(1, 2, 3)), 
                  base = "reconstructed")
  
  mse_g_e <- c(mse_g_e, mean((ts_cos_s[216:252] - g_cos_sn_he[216:252]) ** 2))
  mse_ig_e <- c(mse_ig_e, mean((ts_cos_s[216:252] - ig_cos_sn_he[216:252]) ** 2))
}

cat("gapfill ", mean(mse_g_e), "\n")
## gapfill  0.01978561
cat("iterative gapfill ", mean(mse_ig_e), "\n")
## iterative gapfill  0.02124055

Лучше обычный gapfill (правда, не значимо):

t.test(mse_g_e, mse_ig_e)
## 
##  Welch Two Sample t-test
## 
## data:  mse_g_e and mse_ig_e
## t = -0.56683, df = 193.53, p-value = 0.5715
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.006517441  0.003607571
## sample estimates:
##  mean of x  mean of y 
## 0.01978561 0.02124055

26 Авторегрессия

26.1 Проверка оценки параметров

Смоделируем ARIMA ряды с разными параметрами. Для начала чисто авторегрессионный ряд первого порядка:

set.seed(42)
ts_sim_100 <- arima.sim(list(order = c(1,0,0), ar = 0.7), n = 1000)
plot(ts_sim_100)

acf(ts_sim_100, lag.max = 50)

pacf(ts_sim_100)

auto.arima(ts_sim_100)
## Series: ts_sim_100 
## ARIMA(1,0,0) with zero mean 
## 
## Coefficients:
##          ar1
##       0.6944
## s.e.  0.0230
## 
## sigma^2 = 1.009:  log likelihood = -1423.06
## AIC=2850.12   AICc=2850.13   BIC=2859.94

Видим характерное поведение ACF и PACF, а также верная оценка по auto.arima. Теперь модель скользящего среднего первого порядка:

set.seed(42)
ts_sim_001 <- arima.sim(list(order = c(0,0,1), ma = 0.7), n = 1000)
plot(ts_sim_001)

acf(ts_sim_001, lag.max = 100)

pacf(ts_sim_001)

auto.arima(ts_sim_001)
## Series: ts_sim_001 
## ARIMA(0,0,1) with zero mean 
## 
## Coefficients:
##          ma1
##       0.7070
## s.e.  0.0223
## 
## sigma^2 = 1.009:  log likelihood = -1423.13
## AIC=2850.25   AICc=2850.26   BIC=2860.07

Смоделируем что-то посложнее:

set.seed(42)
ts_sim_523 <- arima.sim(list(order = c(5, 2, 3), ar = c(0.28, -0.2, -0.3, 0.1, 0.4), ma = c(0.7, 0.8, -0.3)), n = 1000)
plot(ts_sim_523)

plot(ts_sim_523 |> diff() |> diff())

acf(ts_sim_523 |> diff() |> diff(), lag.max = 100)

pacf(ts_sim_523 |> diff() |> diff())

auto.arima(ts_sim_523)
## Series: ts_sim_523 
## ARIMA(5,2,3) 
## 
## Coefficients:
##          ar1      ar2      ar3     ar4     ar5     ma1     ma2      ma3
##       0.2380  -0.2210  -0.2826  0.0634  0.3951  0.6843  0.7414  -0.2168
## s.e.  0.0788   0.0332   0.0418  0.0484  0.0314  0.0855  0.0752   0.0818
## 
## sigma^2 = 1.172:  log likelihood = -1498.6
## AIC=3015.2   AICc=3015.39   BIC=3059.37

Попробуем проанализировать некоторые примеры рядов:

ts4 <- ts(read_csv("arima_model/ts4.csv"))
## Rows: 3001 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1): x
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ts8 <- ts(read_csv("arima_model/ts8.csv"))
## Rows: 1000 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1): x
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Для данного ряда тест Dickey-Fuller говорит о том, что стационарность есть, однако auto.arima определяет порядок дифференцирования \(1\), поэтому выберем параметр \(d = 1\):

plot(ts4)

adf.test(ts4)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts4
## Dickey-Fuller = -3.5558, Lag order = 14, p-value = 0.0369
## alternative hypothesis: stationary
plot(ts4 |> diff())

acf(ts4 |> diff(), lag.max = 100)

pacf(ts4 |> diff())

auto.arima(ts4)
## Series: ts4 
## ARIMA(1,1,1) 
## 
## Coefficients:
##          ar1     ma1
##       0.0318  0.7992
## s.e.  0.0225  0.0135
## 
## sigma^2 = 1.004:  log likelihood = -4261.67
## AIC=8529.35   AICc=8529.35   BIC=8547.36
(fit4 <- Arima(ts4, order=c(0,1,1)))
## Series: ts4 
## ARIMA(0,1,1) 
## 
## Coefficients:
##          ma1
##       0.8100
## s.e.  0.0104
## 
## sigma^2 = 1.004:  log likelihood = -4262.67
## AIC=8529.34   AICc=8529.35   BIC=8541.35
plot(fit4)

plot(forecast(fit4, h = 50), xlim = c(2900, 3060))

Для ряда ниже получили модель авторегрессии третьего порядка:

plot(ts8)

adf.test(ts8)
## Warning in adf.test(ts8): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts8
## Dickey-Fuller = -10.044, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
acf(ts8, lag.max = 100)

pacf(ts8)

auto.arima(ts8)
## Series: ts8 
## ARIMA(3,0,0) with zero mean 
## 
## Coefficients:
##          ar1     ar2      ar3
##       0.2085  0.1075  -0.0770
## s.e.  0.0315  0.0321   0.0316
## 
## sigma^2 = 1.913:  log likelihood = -1741.9
## AIC=3491.81   AICc=3491.85   BIC=3511.44
(fit8 <- Arima(ts8, order=c(3,0,0)))
## Series: ts8 
## ARIMA(3,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2      ar3     mean
##       0.2072  0.1064  -0.0783  -0.0650
## s.e.  0.0315  0.0321   0.0316   0.0571
## 
## sigma^2 = 1.913:  log likelihood = -1741.26
## AIC=3492.52   AICc=3492.58   BIC=3517.06
plot(fit8)

plot(forecast(fit8, h = 10), xlim = c(990, 1020))

26.2 Реальный ряд

Определим параметры ARIMA для данных с электричеством (без сезонности) и построим предсказание с доверительными интервалами:

ts_electr_sh_tr <- ts(ts_electr_sh - r_electr_sh_e$Seasonality)
plot(ts_electr_sh_tr)

acf(ts_electr_sh_tr |> diff())

pacf(ts_electr_sh_tr |> diff())

auto.arima(ts_electr_sh_tr, stepwise = FALSE, trace = TRUE)
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(0,1,0)                    : 4431.404
##  ARIMA(0,1,0) with drift         : 4432.508
##  ARIMA(0,1,1)                    : 4384.042
##  ARIMA(0,1,1) with drift         : 4380.795
##  ARIMA(0,1,2)                    : 4386.096
##  ARIMA(0,1,2) with drift         : 4382.652
##  ARIMA(0,1,3)                    : 4387.473
##  ARIMA(0,1,3) with drift         : 4382.999
##  ARIMA(0,1,4)                    : 4389.478
##  ARIMA(0,1,4) with drift         : 4385.062
##  ARIMA(0,1,5)                    : 4390.834
##  ARIMA(0,1,5) with drift         : 4385.707
##  ARIMA(1,1,0)                    : 4393.07
##  ARIMA(1,1,0) with drift         : 4392.754
##  ARIMA(1,1,1)                    : 4387.176
##  ARIMA(1,1,1) with drift         : 4383.606
##  ARIMA(1,1,2)                    : Inf
##  ARIMA(1,1,2) with drift         : 4384.993
##  ARIMA(1,1,3)                    : Inf
##  ARIMA(1,1,3) with drift         : Inf
##  ARIMA(1,1,4)                    : Inf
##  ARIMA(1,1,4) with drift         : Inf
##  ARIMA(2,1,0)                    : 4391.875
##  ARIMA(2,1,0) with drift         : 4390.799
##  ARIMA(2,1,1)                    : 4389.204
##  ARIMA(2,1,1) with drift         : 4384.253
##  ARIMA(2,1,2)                    : Inf
##  ARIMA(2,1,2) with drift         : Inf
##  ARIMA(2,1,3)                    : Inf
##  ARIMA(2,1,3) with drift         : Inf
##  ARIMA(3,1,0)                    : 4389.396
##  ARIMA(3,1,0) with drift         : 4387.276
##  ARIMA(3,1,1)                    : 4391.555
##  ARIMA(3,1,1) with drift         : 4386.813
##  ARIMA(3,1,2)                    : 4381.093
##  ARIMA(3,1,2) with drift         : Inf
##  ARIMA(4,1,0)                    : 4392.356
##  ARIMA(4,1,0) with drift         : 4390.384
##  ARIMA(4,1,1)                    : Inf
##  ARIMA(4,1,1) with drift         : 4389.07
##  ARIMA(5,1,0)                    : 4389.293
##  ARIMA(5,1,0) with drift         : 4385.331
## 
##  Now re-fitting the best model(s) without approximations...
## 
## 
## 
## 
##  Best model: ARIMA(0,1,1) with drift
## Series: ts_electr_sh_tr 
## ARIMA(0,1,1) with drift 
## 
## Coefficients:
##           ma1     drift
##       -0.5323  288.2537
## s.e.   0.0621  119.8871
## 
## sigma^2 = 14894480:  log likelihood = -2195.88
## AIC=4397.77   AICc=4397.88   BIC=4408.04
(fit_elect_arima_tr <- Arima(ts_electr_sh_tr, order=c(0,1,1), include.drift = TRUE))
## Series: ts_electr_sh_tr 
## ARIMA(0,1,1) with drift 
## 
## Coefficients:
##           ma1     drift
##       -0.5323  288.2537
## s.e.   0.0621  119.8871
## 
## sigma^2 = 14894480:  log likelihood = -2195.88
## AIC=4397.77   AICc=4397.88   BIC=4408.04
plot(forecast(fit_elect_arima_tr, h = 2 * freq_12))
lines(ts(ts_electr_2 - r_electr_e$Seasonality))

plot(forecast(fit_elect_arima_tr, h = 2 * freq_12), xlim = c(200, 260))
lines(ts(ts_electr_2 - r_electr_e$Seasonality))

Добавим сезонность и применим Seasonal ARIMA:

plot(ts_electr_sh)

auto.arima(ts_electr_sh, stepwise = FALSE, trace = TRUE)
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(0,1,0)(0,1,0)[12]                    : 4126.288
##  ARIMA(0,1,0)(0,1,1)[12]                    : 4038.829
##  ARIMA(0,1,0)(0,1,2)[12]                    : 4040.837
##  ARIMA(0,1,0)(1,1,0)[12]                    : 4078.156
##  ARIMA(0,1,0)(1,1,1)[12]                    : 4038.75
##  ARIMA(0,1,0)(1,1,2)[12]                    : 4039.319
##  ARIMA(0,1,0)(2,1,0)[12]                    : 4061.196
##  ARIMA(0,1,0)(2,1,1)[12]                    : 4043.188
##  ARIMA(0,1,0)(2,1,2)[12]                    : 4037.291
##  ARIMA(0,1,1)(0,1,0)[12]                    : 4098.766
##  ARIMA(0,1,1)(0,1,1)[12]                    : 4022.407
##  ARIMA(0,1,1)(0,1,2)[12]                    : 4024.483
##  ARIMA(0,1,1)(1,1,0)[12]                    : 4056.01
##  ARIMA(0,1,1)(1,1,1)[12]                    : 4020.022
##  ARIMA(0,1,1)(1,1,2)[12]                    : 4021.072
##  ARIMA(0,1,1)(2,1,0)[12]                    : 4039.773
##  ARIMA(0,1,1)(2,1,1)[12]                    : 4020.581
##  ARIMA(0,1,1)(2,1,2)[12]                    : 4013.566
##  ARIMA(0,1,2)(0,1,0)[12]                    : 4095.577
##  ARIMA(0,1,2)(0,1,1)[12]                    : 4019.584
##  ARIMA(0,1,2)(0,1,2)[12]                    : 4021.655
##  ARIMA(0,1,2)(1,1,0)[12]                    : 4051.194
##  ARIMA(0,1,2)(1,1,1)[12]                    : 4020.562
##  ARIMA(0,1,2)(1,1,2)[12]                    : 4020.725
##  ARIMA(0,1,2)(2,1,0)[12]                    : 4035.716
##  ARIMA(0,1,2)(2,1,1)[12]                    : 4016.642
##  ARIMA(0,1,3)(0,1,0)[12]                    : 4097.653
##  ARIMA(0,1,3)(0,1,1)[12]                    : 4017.144
##  ARIMA(0,1,3)(0,1,2)[12]                    : 4019.229
##  ARIMA(0,1,3)(1,1,0)[12]                    : 4052.301
##  ARIMA(0,1,3)(1,1,1)[12]                    : 4021.231
##  ARIMA(0,1,3)(2,1,0)[12]                    : 4030.255
##  ARIMA(0,1,4)(0,1,0)[12]                    : 4091.831
##  ARIMA(0,1,4)(0,1,1)[12]                    : 4015.122
##  ARIMA(0,1,4)(1,1,0)[12]                    : 4045.984
##  ARIMA(0,1,5)(0,1,0)[12]                    : 4088.909
##  ARIMA(1,1,0)(0,1,0)[12]                    : 4111.161
##  ARIMA(1,1,0)(0,1,1)[12]                    : 4028.755
##  ARIMA(1,1,0)(0,1,2)[12]                    : 4030.765
##  ARIMA(1,1,0)(1,1,0)[12]                    : 4064.662
##  ARIMA(1,1,0)(1,1,1)[12]                    : 4022.238
##  ARIMA(1,1,0)(1,1,2)[12]                    : 4023.962
##  ARIMA(1,1,0)(2,1,0)[12]                    : 4045.793
##  ARIMA(1,1,0)(2,1,1)[12]                    : 4029.713
##  ARIMA(1,1,0)(2,1,2)[12]                    : 4022.232
##  ARIMA(1,1,1)(0,1,0)[12]                    : 4088.133
##  ARIMA(1,1,1)(0,1,1)[12]                    : 4014.248
##  ARIMA(1,1,1)(0,1,2)[12]                    : 4016.285
##  ARIMA(1,1,1)(1,1,0)[12]                    : 4040.518
##  ARIMA(1,1,1)(1,1,1)[12]                    : 4005.488
##  ARIMA(1,1,1)(1,1,2)[12]                    : Inf
##  ARIMA(1,1,1)(2,1,0)[12]                    : 4029.036
##  ARIMA(1,1,1)(2,1,1)[12]                    : 4013.708
##  ARIMA(1,1,2)(0,1,0)[12]                    : 4088.527
##  ARIMA(1,1,2)(0,1,1)[12]                    : 4016.222
##  ARIMA(1,1,2)(0,1,2)[12]                    : 4018.276
##  ARIMA(1,1,2)(1,1,0)[12]                    : 4039.824
##  ARIMA(1,1,2)(1,1,1)[12]                    : 3999.181
##  ARIMA(1,1,2)(2,1,0)[12]                    : 4031.1
##  ARIMA(1,1,3)(0,1,0)[12]                    : Inf
##  ARIMA(1,1,3)(0,1,1)[12]                    : 4017.968
##  ARIMA(1,1,3)(1,1,0)[12]                    : Inf
##  ARIMA(1,1,4)(0,1,0)[12]                    : 4088.167
##  ARIMA(2,1,0)(0,1,0)[12]                    : 4099.598
##  ARIMA(2,1,0)(0,1,1)[12]                    : 4028.851
##  ARIMA(2,1,0)(0,1,2)[12]                    : 4030.944
##  ARIMA(2,1,0)(1,1,0)[12]                    : 4056.21
##  ARIMA(2,1,0)(1,1,1)[12]                    : 4016.558
##  ARIMA(2,1,0)(1,1,2)[12]                    : 4018.461
##  ARIMA(2,1,0)(2,1,0)[12]                    : 4044.096
##  ARIMA(2,1,0)(2,1,1)[12]                    : 4028.23
##  ARIMA(2,1,1)(0,1,0)[12]                    : 4090.718
##  ARIMA(2,1,1)(0,1,1)[12]                    : 4018.898
##  ARIMA(2,1,1)(0,1,2)[12]                    : 4020.954
##  ARIMA(2,1,1)(1,1,0)[12]                    : 4041.251
##  ARIMA(2,1,1)(1,1,1)[12]                    : 4002.829
##  ARIMA(2,1,1)(2,1,0)[12]                    : 4026.985
##  ARIMA(2,1,2)(0,1,0)[12]                    : Inf
##  ARIMA(2,1,2)(0,1,1)[12]                    : 4019.774
##  ARIMA(2,1,2)(1,1,0)[12]                    : Inf
##  ARIMA(2,1,3)(0,1,0)[12]                    : 4089.87
##  ARIMA(3,1,0)(0,1,0)[12]                    : 4102.033
##  ARIMA(3,1,0)(0,1,1)[12]                    : 4031.521
##  ARIMA(3,1,0)(0,1,2)[12]                    : 4033.634
##  ARIMA(3,1,0)(1,1,0)[12]                    : 4059.055
##  ARIMA(3,1,0)(1,1,1)[12]                    : 4014.626
##  ARIMA(3,1,0)(2,1,0)[12]                    : 4046.664
##  ARIMA(3,1,1)(0,1,0)[12]                    : 4092.144
##  ARIMA(3,1,1)(0,1,1)[12]                    : 4022.652
##  ARIMA(3,1,1)(1,1,0)[12]                    : 4042.467
##  ARIMA(3,1,2)(0,1,0)[12]                    : 4093.228
##  ARIMA(4,1,0)(0,1,0)[12]                    : 4104.442
##  ARIMA(4,1,0)(0,1,1)[12]                    : 4032.715
##  ARIMA(4,1,0)(1,1,0)[12]                    : 4058.912
##  ARIMA(4,1,1)(0,1,0)[12]                    : 4090.616
##  ARIMA(5,1,0)(0,1,0)[12]                    : 4104.773
## 
##  Now re-fitting the best model(s) without approximations...
## 
## 
## 
## 
##  Best model: ARIMA(1,1,2)(1,1,1)[12]
## Series: ts_electr_sh 
## ARIMA(1,1,2)(1,1,1)[12] 
## 
## Coefficients:
##          ar1      ma1     ma2    sar1     sma1
##       0.6347  -1.0023  0.1123  0.0636  -0.8325
## s.e.  0.1396   0.1574  0.1135  0.1000   0.0933
## 
## sigma^2 = 19234185:  log likelihood = -2112.38
## AIC=4236.76   AICc=4237.17   BIC=4256.99
(fit_elect_arima <- Arima(ts_electr_sh, order=c(1,1,2), seasonal = c(1,1,1)))
## Series: ts_electr_sh 
## ARIMA(1,1,2)(1,1,1)[12] 
## 
## Coefficients:
##          ar1      ma1     ma2    sar1     sma1
##       0.6347  -1.0023  0.1123  0.0636  -0.8325
## s.e.  0.1396   0.1574  0.1135  0.1000   0.0933
## 
## sigma^2 = 19234185:  log likelihood = -2112.38
## AIC=4236.76   AICc=4237.17   BIC=4256.99
f_elect_arima <- forecast(fit_elect_arima, h = 2 * freq_12)

plot(f_elect_arima)
lines(ts_electr_2)

plot(f_elect_arima, xlim = c(20, 22))
lines(ts_electr_2)

Вычислим ошибки предсказания для SSA и ARIMA:

f_part_electr <- ts_electr_2[(length(ts_electr_2) - 2 * freq_12 + 1):length(ts_electr_2)]
cat("SSA реккурентное предсказание: ", mean((rfor_electr - f_part_electr) ** 2), "\n")
## SSA реккурентное предсказание:  11806850
cat("SSA векторное предсказание: ", mean((vfor_electr - f_part_electr) ** 2), "\n")
## SSA векторное предсказание:  11864546
cat("Seasonal ARIMA предсказание", mean((f_elect_arima$mean - f_part_electr) ** 2), "\n")
## Seasonal ARIMA предсказание 21289575

27 Обнаружение разладки

27.1

Сгенерируем ряд с временной разладкой:

N_temp <- 252
ts_temp_break <- ts(c(1:(N_temp %/% 2), N_temp %/% 2 - 1:(N_temp %/% 2)) + rnorm(N_temp))
plot(ts_temp_break)

Построим матрицы неоднородности для разных длин окон:

rank_temp <- 1
M_temp <- 48; L_temp <- M_temp / 2
hm48_temp <- hmatr(ts_temp_break, B = M_temp, T = M_temp, L = L_temp, neig = rank_temp)
plot(hm48_temp)

Наконец, нарисуем функции разладки для разных методов перемещения окон:

path_hor_temp <- c()
path_diag_temp <- c()
path_sdiag_temp <- c()

for (i in 1:ncol(hm48_temp)) {
  path_hor_temp <- c(path_hor_temp, hm48_temp[i, 20])
  path_diag_temp <- c(path_diag_temp, hm48_temp[i, i])
  
  if (i + 60 < ncol(hm48_temp))
    path_sdiag_temp <- c(path_sdiag_temp, hm48_temp[i + 60, i])
}

par(mfrow = c(4, 1))
plot(path_hor_temp, type = "l", main = "Horizontal")
plot(path_diag_temp, type = "l", main = "Diagonal")
plot(c(rep(NA, 30), path_sdiag_temp, rep(NA, 30)), type = "l", main = "Conjugated")
plot(ts_temp_break, main = "Series")

27.2 Постоянная разладка

Сгенерируем ряд со следующей разладкой: сначала сумма косинусов с периодами \(7\) и \(12\), а потом остаётся только косинус с периодом \(12\):

N_cos_break <- 252
ts_cos_break <- ts(c(1 / 2 * cos(2 * pi * ((N_cos_break %/% 2 + 1):N_cos_break) / 12) + 1 / 2 * cos(2 * pi * ((N_cos_break %/% 2 + 1):N_cos_break) / 7), cos(2 * pi * (1:(N_cos_break %/% 2)) / 12)))
plot(ts_cos_break)

Построим матрицы неоднородности для разных длин окон:

rank_cos_break <- 4
M_cos_break <- 48; L_cos_break <- M_cos_break / 2
hm48_cos_break <- hmatr(ts_cos_break, B = M_cos_break, T = M_cos_break, L = L_cos_break, neig = rank_cos_break)
plot(hm48_cos_break)

Наконец, нарисуем функции разладки для разных методов перемещения окон:

path_hor_cos_break <- c()
path_diag_cos_break <- c()
path_sdiag_cos_break <- c()

for (i in 1:ncol(hm48_cos_break)) {
  path_hor_cos_break <- c(path_hor_cos_break, hm48_cos_break[i, 20])
  path_diag_cos_break <- c(path_diag_cos_break, hm48_cos_break[i, i])
  
  if (i + 60 < ncol(hm48_cos_break))
    path_sdiag_cos_break <- c(path_sdiag_cos_break, hm48_cos_break[i + 60, i])
}

par(mfrow = c(4, 1))
plot(path_hor_cos_break, type = "l", main = "Horizontal")
plot(path_diag_cos_break, type = "l", main = "Diagonal")
plot(c(rep(NA, 30), path_sdiag_cos_break, rep(NA, 30)), type = "l", main = "Conjugated")
plot(ts_cos_break, main = "Series")

27.3 Реальный ряд

Применим матрицу неоднородности к реальному ряду:

rank_electr <- 10
M_electr <- 48; L_electr <- M_electr / 2
hm48_electr <- hmatr(ts_electr_2, B = M_electr, T = M_electr, L = L_electr, neig = rank_electr)
plot(hm48_electr)

Нарисуем функции разладки для разных методов перемещения окон:

path_hor_electr <- c()
path_diag_electr <- c()
path_sdiag_electr <- c()

for (i in 1:ncol(hm48_electr)) {
  path_hor_electr <- c(path_hor_electr, hm48_electr[i, 20])
  path_diag_electr <- c(path_diag_electr, hm48_electr[i, i])
  
  if (i + 60 < ncol(hm48_electr))
    path_sdiag_electr <- c(path_sdiag_electr, hm48_electr[i + 60, i])
}

par(mfrow = c(4, 1))
plot(path_hor_electr, type = "l", main = "Horizontal")
plot(path_diag_electr, type = "l", main = "Diagonal")
plot(c(rep(NA, 30), path_sdiag_electr, rep(NA, 30)), type = "l", main = "Conjugated")
plot(ts_electr_2, main = "Series")